In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import torch.nn as nn
import torch.nn.functional as F
import itertools

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Import simulation data
u_array = np.load('unp_concat_100.npy')
m_array = np.load('mnp_concat_100.npy')

# Import reduced-order subspaces
AS = np.load('AS_fs_wom.npy')
PCA = np.load('PCA_U_f_100_allstep.npy')

# Import finite element mass matrix
M = np.load('M.npy')

# Import Bayesian prior information
prior_m_precision = np.load('prior_prec.npy')
prior_m_covariance = np.load('prior.npy')
prior_mean = np.load('prior_mean.npy')

# Import reference solution data
obs_mean = np.load('obs_mean.npy')

# Prepare parameter data
m_array = torch.tensor(m_array)
m_array_ = m_array - prior_mean  

# Reduce parameter dimension using active subspace
m_red = m_array_ @ (prior_m_precision @ AS)

# Prepare observation data
u_array_ = u_array - obs_mean 

# Truncate PCA basis to 129 dimensions
PCA = PCA[:,:129]

# Set number of time steps for training
num_steps = 100

# Create training datasets
train_m = torch.tensor(m_red.numpy()[:-100], dtype=torch.float32)  
train_s = torch.tensor(u_array_[:-100,0,:]@PCA, dtype=torch.float32)  
output_s = torch.tensor(u_array_[:-100,1:1+num_steps,:]@PCA, dtype=torch.float32) 

# Normalize parameter data
train_m_100 = train_m

mean = torch.mean(train_m_100,dim=0)  
std = torch.std(train_m_100,dim=0)    

sdata_m = (train_m_100 - mean)/std   

# Normalize observation data
mean_o = output_s.mean(dim = (0,1))   
# std_o = output_s.std(dim=(0,1))     

# Set observation std to 1 for normalization
std_o = torch.ones_like(std_o)

sdata_y = (train_s - mean_o)/(std_o)      
sdata_y_t = (output_s - mean_o)/(std_o)  

# Limit dataset size to 1280 samples
sdata_m_ = sdata_m[:1024+256]
sdata_y_ = sdata_y[:1024+256]
sdata_y_t_ = sdata_y_t[:1024+256]

In [49]:
# Load the model

def load_checkpoint_and_inspect(checkpoint_path, model):
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    epoch = checkpoint['epoch']
    best_val_loss = checkpoint['best_val_loss']
    num_steps = checkpoint['num_steps']
    patience_counter = checkpoint['patience_counter']
    
    if 'optimizer_state_dict' in checkpoint:
        print("Optimizer state in checkpoint:")
        print(checkpoint['optimizer_state_dict'].keys())
        for param_group in checkpoint['optimizer_state_dict']['param_groups']:
            print(f"Learning rate: {param_group['lr']}")
            print(f"Parameters: {len(param_group['params'])}")
    else:
        print("No optimizer state found in checkpoint")
    
    return model, epoch, best_val_loss, num_steps, patience_counter


In [50]:
# Load pre-trained model and checkpoint data
checkpoint_path = 'checkpoints/best_model_checkpoint_good_jac.pth'
model = StatePredictor(129, 100, 128, num_steps=num_steps)
model, start_epoch, best_val_loss, num_steps, patience_counter = load_checkpoint_and_inspect(checkpoint_path, model)

# Create a fresh optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=0.005)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=10, factor=0.5)

Optimizer state in checkpoint:
dict_keys(['state', 'param_groups'])
Learning rate: 0.005
Parameters: 1


In [72]:
# Move normalization statistics to device
std_o = std_o.to(device)
std = std.to(device)
mean_o = mean_o.to(device)
sdata_m = sdata_m.to(device)
PCA_ = torch.tensor(PCA, device=device)
noise_var = 3.9e-3
Noise_prec = (M.double() / noise_var).to(device)

init_y = torch.tensor(sdata_y[0].clone().detach().unsqueeze(0).repeat(200, 1), 
                      dtype=torch.float32, device=device)
UMU = PCA_.T @ Noise_prec @ PCA_
model = model.to(device)

  init_y = torch.tensor(sdata_y[0].clone().detach().unsqueeze(0).repeat(200, 1),


Static Design for initial design

In [None]:
import itertools
numbers = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# Generate all combinations of 4 elements
combinations = list(itertools.combinations(numbers, 4))

for combo in combinations:
    print('current_comb', combo)
    combo_list = list(combo)
    
    ## Generate prior sample
    random_indices = torch.randperm(1024)[:200]
    m_samples = sdata_m[random_indices]
    states = model(init_y,m_samples)[0]
    full_states = (states*std_o + mean_o).double() @ PCA_.T
    noise = torch.normal(mean=0, std=np.sqrt(noise_var), size=full_states.shape).to(device)
    full_noisy_states = (noise + full_states).detach()

    ## Precompute
    yMU = (full_noisy_states[:,combo_list] @ Noise_prec @ PCA_)

    ## MAP point
    num_iterations = 0
    learning_rate = 0.005
    number_of_samples = 200
    model.eval()
    L2_lbfgs = []
    m = m_samples.detach().clone().requires_grad_(True)
    optimizer = optim.LBFGS([m], 
                       lr=learning_rate, 
                       max_iter=150,           
                       max_eval=None,          
                       tolerance_grad=1e-7,    
                       tolerance_change=1e-9,  
                       history_size=150,       
                       line_search_fn="strong_wolfe") 

    def closure():
        """Closure function for L-BFGS optimizer - computes loss and gradients"""
        optimizer.zero_grad()
        
        # Forward pass through neural network
        states = model(init_y, m)[0]
        
        # Transform predictions to observation space
        nn_val = (states*std_o + mean_o).double()[:,9::10]  
        
        # Efficient likelihood computation using precomputed matrices
        like1 = torch.einsum('bij,bij->b',nn_val,yMU)                  
        like2 = torch.einsum('bij,jk,bik->b', nn_val, UNoise_precU, nn_val)  
        like = (-2*like1 + like2)/2.                                 
        
        # Prior regularization term
        prior = torch.einsum('ij,ij->i', m, m) /2.
        
        # Total loss (negative log posterior)
        loss = (prior + like).mean()
        
        # Compute gradients
        loss.backward()
        return loss

    # Optimization loop
    for epoch in range(1):
        loss = optimizer.step(closure)  
        
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")
        
        # Store optimization metrics
        L2_lbfgs.append(loss.item())

    ## Compute Jacobian at the Map point
    jacobian_func = compute_jacobian_batched(model, init_y, m.detach())
    with torch.no_grad():
        jacobian_func_t = jacobian_func.transpose(2, 3)
        res = torch.einsum('abij,jk,abkl->abil', jacobian_func_t.double(), UMU, jacobian_func.double())

        ## Compute the JTNJ
        res1= res.mean(dim=1)
        eigenvalues = torch.linalg.eigvalsh((res1))

        l1 = torch.log(1+ eigenvalues).sum(dim=1)
        l2 = (-eigenvalues/(1+eigenvalues)).sum(dim=1)
        map_prior = m.detach()
        l3 = torch.sum(map_prior * map_prior, dim=1)

        beig = l1+l2+l3
        cur_eig = beig.mean().detach()
        eig_list.append(cur_eig)
        if cur_eig > max_eig:
            print('max_eig', cur_eig)
            max_eig = cur_eig
            best_comb = combo
            print("best_comb", combo)

Applying Algorithm 1

In [73]:
max_eig = -1
eig_list = []
N = 10
xi_star = np.zeros(N, dtype=int)
dy_star = np.zeros(N)
Y_real = {}
noise_std = np.sqrt(noise_var)
i = 0

In [81]:
# Generate true observation

random_indices_true = torch.randint(0, 100, (1,))
print(random_indices_true)
# random_indices_true = 84

model = model.to('cpu')
states_true = model(test_t_in_r[random_indices_true], m_test_r[random_indices_true])[0].double()

# Adjust each state and transform
adjusted_states_true = torch.stack([(state * std_o.to('cpu') + mean_o.to('cpu')) @ PCA_.to('cpu').T for state in states_true]).detach()

noise = torch.normal(mean=0, std=noise_std, size=adjusted_states_true.shape)
full_noisy_true_states = (noise + adjusted_states_true).detach().to(device)

In [86]:
def generate_sample_post(post_map, post_eigval, post_eigvec, device=device):
    s = 1-1/torch.sqrt(post_eigval+1)
    return post_map + torch.rand(200, len(s)).double().to(device)@(torch.eye(len(s)).to(device) - post_eigvec @ torch.diag(s) @ post_eigvec.T)

In [None]:
numbers = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
next_time_to_check = 2
best_indices = [1]

cur_eig = 0
max_eig = 0

# Initialize true_obs with the correct shape
true_obs = torch.zeros_like(full_noisy_true_states).to(device)

m1 = nn.Parameter(torch.zeros((1,128)).to(device))
init_y1 = init_y[0].unsqueeze(0)
num_iterations = 0
learning_rate = 0.005
number_of_samples = 200


optimizer = optim.LBFGS([m1], 
                       lr=learning_rate, 
                       max_iter=150,           
                       max_eval=None,          
                       tolerance_grad=1e-7,    
                       tolerance_change=1e-9,  
                       history_size=150,       
                       line_search_fn="strong_wolfe") 

    def closure():
        """Closure function for L-BFGS optimizer - computes loss and gradients"""
        optimizer.zero_grad()
        
        # Forward pass through neural network
        states = model(init_y, m1)[0]
        
        # Transform predictions to observation space
        nn_val = (states*std_o + mean_o).double()[:,9::10]  
        
        # Efficient likelihood computation using precomputed matrices
        like1 = torch.einsum('bij,bij->b',nn_val,yMU)                  
        like2 = torch.einsum('bij,jk,bik->b', nn_val, UNoise_precU, nn_val)  
        like = (-2*like1 + like2)/2.                                 
        
        # Prior regularization term
        prior = torch.einsum('ij,ij->i', m1, m1) /2.
        
        # Total loss (negative log posterior)
        loss = (prior + like).mean()
        
        # Compute gradients
        loss.backward()
        return loss

    # Optimization loop
    for epoch in range(1):
        loss = optimizer.step(closure)  
        
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")
        
        # Store optimization metrics
        L2_lbfgs.append(loss.item())

jacobian_func1 = compute_jacobian_batched(model, init_y1, m1.detach())
with torch.no_grad():
    jacobian_func1_t = jacobian_func1.transpose(2, 3)
    res_t2 = torch.einsum('abij,jk,abkl->abil', jacobian_func1_t.double(), UMU, jacobian_func1.double())

    ## Compute the JTNJ
    gn_h = res_t2[0,best_indices].mean(dim=0)

    eigvalues, eigenectors  = torch.linalg.eigh((gn_h))
    eigvalues = eigvalues.flip(0)
    eigenectors = eigenectors.flip(1)

post_map, post_eigval, post_eigvec = m1.detach(), eigvalues, eigenectors

for k in range(1,3):
    max_eig = float('-inf')
    bestIndex = -1

    # Generate all combinations of 4 elements
    combinations = list(itertools.combinations(numbers[next_time_to_check:], 4-k))

    for combo in combinations:
        print('current_comb', combo)
        combo_list = best_indices + list(combo)
        ## Generate prior sample
        random_indices = torch.randperm(1024)[:200]
        m_samples = sdata_m[random_indices]
        states = model(init_y,m_samples)[0]
        full_states = (states*std_o + mean_o).double() @ PCA_.T
        noise = torch.normal(mean=0, std=np.sqrt(noise_var), size=full_states.shape).to(device)
        full_noisy_states = (noise + full_states).detach()

        if best_indices:
            full_noisy_states[:, best_indices] = true_obs[:, best_indices]

        ## Precompute
        yMU = (full_noisy_states[:,combo_list] @ Noise_prec @ PCA_)

        ## MAP point
        num_iterations = 0
        learning_rate = 0.005
        number_of_samples = 200
        model.eval()
        
        
        optimizer = optim.LBFGS([m], 
                       lr=learning_rate, 
                       max_iter=150,           
                       max_eval=None,          
                       tolerance_grad=1e-7,    
                       tolerance_change=1e-9,  
                       history_size=150,       
                       line_search_fn="strong_wolfe") 

        # Storage for optimization history
        L2_lbfgs = []

        def closure():
            """Closure function for L-BFGS optimizer - computes loss and gradients"""
            optimizer.zero_grad()
            
            # Forward pass through neural network
            states = model(init_y, m)[0]
            
            # Transform predictions to observation space
            nn_val = (states*std_o + mean_o).double()[:,9::10]  
            
            # Efficient likelihood computation using precomputed matrices
            like1 = torch.einsum('bij,bij->b',nn_val,yMU)                  
            like2 = torch.einsum('bij,jk,bik->b', nn_val, UNoise_precU, nn_val)  
            like = (-2*like1 + like2)/2.                                 
            
            # Prior regularization term
            prior = torch.einsum('ij,ij->i', m, m) /2.
            
            # Total loss (negative log posterior)
            loss = (prior + like).mean()
            
            # Compute gradients
            loss.backward()
            return loss

        # Optimization loop
        for epoch in range(1):
            loss = optimizer.step(closure)  
            
            print(f"Epoch {epoch}, Loss: {loss.item():.4f}")
            
            # Store optimization metrics
            L2_lbfgs.append(loss.item())

        ## Compute Jacobian at the Map point
        jacobian_func = compute_jacobian_batched(model, init_y, m.detach())
        with torch.no_grad():
            jacobian_func_t = jacobian_func.transpose(2, 3)
            # print(jacobian_func_t.shape)
            res = torch.einsum('abij,jk,abkl->abil', jacobian_func_t.double(), UMU, jacobian_func.double())
            # print(res.shape)

            ## Compute the JTNJ
            res1= res.mean(dim=1)
            # print(res1.shape)
            eigenvalues = torch.linalg.eigvalsh((res1))

            l1 = torch.log(1+ eigenvalues).sum(dim=1)
            l2 = (-eigenvalues/(1+eigenvalues)).sum(dim=1)
            map_prior = m.detach()
            l3 = torch.sum(map_prior * map_prior, dim=1)

            beig = l1+l2+l3
            cur_eig = beig.mean().detach()
            eig_list.append(cur_eig)
            print(cur_eig)
            if cur_eig > max_eig:
                print('max_eig', cur_eig)
                max_eig = cur_eig
                best_comb = combo
                print("best_comb", combo)
    
    bestIndex = combo[k]
    best_indices.append(bestIndex)
    next_time_to_check = bestIndex + 1
    true_obs[0, bestIndex] = full_noisy_true_states[0, bestIndex]
    print(true_obs)
    print("Done")
    print("bestIndex",bestIndex)