In [1]:
import sys
import os

# Add the src directory to Python path so model.py can find ssn and net modules
sys.path.append(os.path.abspath('../src'))

from src.model import model
from src.model_outerweights import model_outerweights
from src.greedy_insertion import insertion

import numpy as np
from loguru import logger
import torch

In [2]:
# load the data
path = '../data_result/raw_data/VDP_beta_0.1_grid_combined.npy'# Initialize the weights
data = np.load(path)
logger.info(f"Loaded data with shape: {data.shape}, dtype: {data.dtype}")

[32m2025-08-04 11:48:48.377[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mLoaded data with shape: (1800,), dtype: [('x', '<f8', (2,)), ('dv', '<f8', (2,)), ('v', '<f8')][0m


In [3]:
# Initialize the parameter
power = 2
gamma = 5.0
M = 100 # number greedy insertion selected
alpha = 1e-5
regularization = (gamma, alpha) 
num_iterations = 20
loss_weights = (1.0, 1.0)
pruning_threshold = 1e-3

In [4]:
# Initialize the model 
model_1 = model(data, torch.relu, power, regularization, optimizer='Adam', loss_weights = loss_weights)
model_2 = model_outerweights(data, torch.relu, power, regularization, optimizer='SSN', loss_weights = loss_weights)

[32m2025-08-04 11:48:48[0m | [1mINFO    [0m | [36msrc.model[0m:[36m_configure_logger[0m:[36m82[0m - [1mVDPModel initialized[0m
[32m2025-08-04 11:48:48[0m | [1mINFO    [0m | [36msrc.model_outerweights[0m:[36m_configure_logger[0m:[36m82[0m - [1mVDPModel (outer weights) initialized[0m


In [5]:
# Set up the initializing weights and bias
init_weights = np.random.randn(M, 2) * 0.1
init_bias = np.random.randn(M)

In [6]:
model_result, weight_raw, bias_raw, outerweight_raw = model_1.train(
    iterations=5000,
    display_every=1000,
    inner_weights=init_weights, inner_bias=init_bias,
)
logger.info("Initialization done"); logger.info(f"Initial weights shape: {weight_raw.shape}, bias shape: {bias_raw.shape}")

[32m2025-08-04 11:48:48[0m | [1mINFO    [0m | [36msrc.model[0m:[36mtrain[0m:[36m245[0m - [1mStarting network training session[0m
[32m2025-08-04 11:48:48[0m | [1mINFO    [0m | [36msrc.model[0m:[36m_prepare_data[0m:[36m126[0m - [1mTraining set: 1620 samples, Validation set: 180 samples[0m
[32m2025-08-04 11:48:48[0m | [1mINFO    [0m | [36msrc.model[0m:[36m_prepare_data[0m:[36m129[0m - [1mData ranges - x: [-3.00, 3.00], v: [0.00, 10.96], dv: [-13.19, 13.19][0m
[32m2025-08-04 11:48:48[0m | [1mINFO    [0m | [36msrc.model[0m:[36m_create_network[0m:[36m162[0m - [1mCreating network with 100 neurons[0m
[32m2025-08-04 11:48:48[0m | [1mINFO    [0m | [36msrc.model[0m:[36m_setup_optimizer[0m:[36m180[0m - [1mUsing Adam optimizer with lr=0.0001[0m
[32m2025-08-04 11:48:48[0m | [1mINFO    [0m | [36msrc.model[0m:[36mtrain[0m:[36m263[0m - [1mTraining model, saving to /Users/ruizhechao/Documents/NNforHJB/train_history[0m
[32m2025-08-0

In [8]:
# Training the model
for i in range(num_iterations - 1):  
    logger.info(f"Iteration {i} - current weights shape: {weight_raw.shape}, current bias shape: {bias_raw.shape}") 
    model, weight, bias, outerweights = model_2.train(inner_weights = weight_raw, inner_bias = bias_raw, outer_weights = outerweight_raw)
            
    # Convert to flat array and count elements with absolute value less than threshold
    outerweights_raw = outerweight_raw.flatten() 
    outerweights = outerweights.flatten()
    small_weights_count = np.sum(np.abs(outerweights_raw) < pruning_threshold)
    small_weights_filtered_count = np.sum(np.abs(outerweights) < pruning_threshold)
    
    logger.info(f"1st model weights shape: {np.shape(outerweight_raw)}, 2nd model weights shape: {np.shape(outerweights)}, Pruned neurons in 2nd model with abs value < {pruning_threshold}: {small_weights_count}")
    
    # insert M neurons
    weight_temp, bias_temp = insertion(data, model_result, M, alpha)
    # weight_temp and bias_temp are already numpy arrays from insertion()
    weights = np.concatenate((weight, weight_temp), axis=0)
    biases = np.concatenate((bias, bias_temp), axis=0)
    logger.info(f"Iteration {i} - inserted weights shape: {weight_temp.shape}, inserted bias shape: {bias_temp.shape}")
    
    # train 1st model with adam than accelarate with 2nd model with ssn
    model_result, weight_raw, bias_raw, outerweight_raw = model_1.train(inner_weights=weights, inner_bias=biases)
    

[32m2025-08-04 11:49:37[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [1mIteration 0 - current weights shape: (100, 2), current bias shape: (100,)[0m
[32m2025-08-04 11:49:37[0m | [1mINFO    [0m | [36msrc.model_outerweights[0m:[36mtrain[0m:[36m245[0m - [1mStarting network training session (outer weights only)[0m
[32m2025-08-04 11:49:37[0m | [1mINFO    [0m | [36msrc.model_outerweights[0m:[36m_prepare_data[0m:[36m126[0m - [1mTraining set: 1620 samples, Validation set: 180 samples[0m
[32m2025-08-04 11:49:37[0m | [1mINFO    [0m | [36msrc.model_outerweights[0m:[36m_prepare_data[0m:[36m129[0m - [1mData ranges - x: [-3.00, 3.00], v: [0.00, 10.96], dv: [-13.19, 13.19][0m
[32m2025-08-04 11:49:37[0m | [1mINFO    [0m | [36msrc.model_outerweights[0m:[36m_setup_optimizer[0m:[36m169[0m - [1mUsing SSN optimizer with alpha=1e-05, gamma=5.0[0m
[32m2025-08-04 11:49:37[0m | [1mINFO    [0m | [36msrc.model_outerweights[0m:

RuntimeError: SSN optimizer failed repeatedly - stopping to prevent infinite loop

The problem is that the line search always fails. Observe the variable loss_new,   after damping the paramter the loss is not decreased and eventually an error comes out.


2025-08-04 10:18:08 | DEBUG    | ssn:step:259 - Solution (dq) norm: 2.743474e-11**  
2025-08-04 10:18:08 | DEBUG    | ssn:step:277 - Initial theta0: 6.042138e+10, step norm: 2.743474e-11
2025-08-04 10:18:08 | DEBUG    | ssn:step:308 - Damping step 0: theta=6.04e+10, **loss_new=4.355797e+00**   
2025-08-04 10:18:08 | DEBUG    | ssn:step:308 - Damping step 1: theta=1.51e+10, **loss_new=4.355887e+00**    
2025-08-04 10:18:08 | DEBUG    | ssn:step:308 - Damping step 2: theta=3.78e+09, **loss_new=4.355887e+00**  
2025-08-04 10:18:08 | DEBUG    | ssn:step:308 - Damping step 3: theta=9.44e+08, **loss_new=4.355887e+00**  
2025-08-04 10:18:08 | DEBUG    | ssn:step:308 - Damping step 4: theta=2.36e+08, loss_new=4.355887e+00  
2025-08-04 10:18:08 | DEBUG    | ssn:step:308 - Damping step 5: theta=5.90e+07, loss_new=4.355887e+00  
2025-08-04 10:18:08 | DEBUG    | ssn:step:308 - Damping step 6: theta=1.48e+07, loss_new=4.355887e+00  
2025-08-04 10:18:08 | DEBUG    | ssn:step:308 - Damping step 7: theta=3.69e+06, loss_new=4.355887e+00  
2025-08-04 10:18:08 | DEBUG    | ssn:step:308 - Damping step 8: theta=9.22e+05, loss_new=4.355887e+00  
2025-08-04 10:18:08 | DEBUG    | ssn:step:308 - Damping step 9: theta=2.30e+05, loss_new=4.355887e+00  
2025-08-04 10:18:08 | DEBUG    | ssn:step:308 - Damping step 20: theta=5.50e-02, loss_new=4.355887e+00  
2025-08-04 10:18:08 | DEBUG    | ssn:step:308 - Damping step 40: theta=5.00e-14, loss_new=4.355887e+00  
2025-08-04 10:18:08 | WARNING  | ssn:step:304 - Theta reached 7.8e-16. Breaking line search loop.  
2025-08-04 10:18:08 | INFO     | src.model_outerweights:train:308 - Epoch 0: Train Loss = 2.915392, Val Loss = 6.527839  
2025-08-04 10:18:08 | DEBUG    | ssn:step:210 - Initial loss: 4.355797e+00, penalty: 9.064044e-05  
2025-08-04 10:18:08 | DEBUG    | ssn:_transform_param2q:177 - Gradient computed successfully, norm: 2.274264e+01  
2025-08-04 10:18:08 | DEBUG    | ssn:_Hessian:152 - Using correct Gauss-Newton Hessian: S^T*S*DPc, S shape: torch.Size([1620, 100])  