In [None]:
import pandas as pd
import numpy as np
import sys
import GPUtil
import matplotlib.pyplot as plt
import torch
from sklearn.preprocessing import Normalizer
import joblib
import os 
import ili
from ili.dataloaders import NumpyLoader
from ili.inference import InferenceRunner
import h5py

# Import your custom modules
sys.path.append("/disk/xray15/aem2/camels/proj2")
from setup_params_1P import plot_uvlf, plot_colour
from setup_params_SB import *
from priors_SB import initialise_priors_SB28_splitGPU
from variables_config_28 import uvlf_limits, n_bins_lf, colour_limits, n_bins_colour
colours = False
luminosity_functions = True

In [None]:


def setup_gpus(minimum_memory=2):
    """Sets up GPUs for training by finding available devices with sufficient memory."""
    gpu_list = GPUtil.getAvailable(
        order="memory",
        limit=100,
        maxLoad=0.5,
        maxMemory=0.5
    )
    
    available_gpus = []
    for gpu in gpu_list:
        gpu_props = torch.cuda.get_device_properties(gpu)
        available_memory = gpu_props.total_memory / 1e9
        
        if available_memory >= minimum_memory:
            available_gpus.append(gpu)
            print(f"GPU {gpu}: {available_memory:.2f} GB memory - Available for training")
    
    if not available_gpus:
        print("No suitable GPUs found - defaulting to CPU")
        return []
    
    # Set primary GPU
    os.environ["CUDA_VISIBLE_DEVICES"] = str(available_gpus[0])
    return available_gpus

def create_distributed_networks(hidden_features, num_transforms, num_nets, available_gpus, x_sample, theta_sample, primary_device):
    """Creates neural networks and ensures they're on the correct device."""
    print(f"Creating neural network ensemble on device {primary_device}")
    
    nets = []
    # Create network builder
    net_builder = ili.utils.load_nde_sbi(
        engine="NPE",
        model="nsf",
        hidden_features=hidden_features,
        num_transforms=num_transforms
    )
    
    # Create all networks on primary device
    for i in range(num_nets):
        net = net_builder(batch_theta=theta_sample, batch_x=x_sample)
        net = net.to(primary_device)
        nets.append(net)
        print(f"Created network {i+1}/{num_nets} on {primary_device}")
    
    return nets
'''
def initialize_training(hidden_features, num_transforms, train_args, prior, model_out_dir, name, x_all, theta, train_mask):
    """Initialize training with proper device handling.
    
    The prior is a MultipleIndependent distribution that needs special handling
    for device placement. Instead of moving the prior, we'll create it on the
    right device from the start.
    """
    # Set up GPUs and select primary device
    available_gpus = setup_gpus(minimum_memory=2)
    primary_device = f'cuda:{available_gpus[0]}' if available_gpus else 'cpu'
    print(f"Using {primary_device} as primary device")
    
    # Instead of moving the prior, prepare the data on the correct device
    x_sample = x_all[train_mask][:32].to(primary_device)
    theta_sample = theta[train_mask][:32].to(primary_device)
    
    # Create networks on the correct device
    nets = create_distributed_networks(
        hidden_features=hidden_features,
        num_transforms=num_transforms,
        num_nets=4,
        available_gpus=available_gpus,
        x_sample=x_sample,
        theta_sample=theta_sample,
        primary_device=primary_device
    )
    
    # Initialize runner with the prior and device
    runner = InferenceRunner.load(
        backend="sbi",
        engine="NPE",
        prior=prior,  # Use prior as is, don't try to move it
        nets=nets,
        device=primary_device,
        train_args=train_args,
        proposal=None,
        out_dir=model_out_dir,
        name=name
    )
    
    return runner, nets
'''

def initialize_training(hidden_features, num_transforms, train_args, prior, model_out_dir, name, x_all, theta, train_mask):
    """Initialize training with proper device handling."""
    # Set up GPUs and select primary device
    available_gpus = setup_gpus(minimum_memory=2)
    if available_gpus:
        primary_device = f'cuda:{available_gpus[0]}'
    else:
        primary_device = 'cpu'
    print(f"Using {primary_device} as primary device")

    # Process data on the primary device
    x_sample = x_all[train_mask][:32].to(primary_device)
    theta_sample = theta[train_mask][:32].to(primary_device)

    # Create networks on the correct device
    nets = create_distributed_networks(
        hidden_features=hidden_features,
        num_transforms=num_transforms,
        num_nets=4,
        available_gpus=available_gpus,
        x_sample=x_sample,
        theta_sample=theta_sample,
        primary_device=primary_device
    )

    # Initialize runner with the prior and device
    runner = InferenceRunner.load(
        backend="sbi",
        engine="NPE",
        prior=prior.to(primary_device),  # Move the prior to the primary device
        nets=nets,
        device=primary_device,
        train_args=train_args,
        proposal=None,
        out_dir=model_out_dir,
        name=name
    )

    return runner, nets

def main():
    # Basic configuration
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = "IllustrisTNG"
    spec_type = "attenuated"
    sps = "BC03"
    snap = ["044"]
    bands = "all"
    
    colours = False
    luminosity_functions = True
    
    # Create unique name for this run
    name = f"{model}_{bands}_{sps}_{spec_type}_{n_bins_lf}_{n_bins_colour}"
    
    # Initialize CAMELS simulation interface
    cam = camels(model=model, sim_set='SB28')
    
    # Set output directories based on configuration
    if colours and not luminosity_functions:
        model_out_dir = "/disk/xray15/aem2/data/28pams/IllustrisTNG/SB/models/colours_only/"
        plots_out_dir = "/disk/xray15/aem2/plots/28pams/IllustrisTNG/SB/test/sbi_plots/colours_only/"
    elif luminosity_functions and not colours:
        model_out_dir = "/disk/xray15/aem2/data/28pams/IllustrisTNG/SB/models/lf_only/"
        plots_out_dir = "/disk/xray15/aem2/plots/28pams/IllustrisTNG/SB/test/sbi_plots/lfs_only/"
    elif colours and luminosity_functions:
        model_out_dir = "/disk/xray15/aem2/data/28pams/IllustrisTNG/SB/models/colours_lfs/"
        plots_out_dir = "/disk/xray15/aem2/plots/28pams/IllustrisTNG/SB/test/sbi_plots/colours_lfs/"
    else:
        raise ValueError("At least one of colours or luminosity_functions must be True")
    
    print(f"Saving model in {model_out_dir}")
    print(f"Saving plots in {plots_out_dir}")
    
    # Load and process data
    df_info = pd.read_csv("/disk/xray15/aem2/data/28pams/Info_IllustrisTNG_L25n256_28params.txt")
    theta, x = get_theta_x_SB(luminosity_functions=luminosity_functions, colours=colours)
    
    # Set up device first
    available_gpus = setup_gpus(minimum_memory=2)
    primary_device = f'cuda:{available_gpus[0]}' if available_gpus else 'cpu'
    print(f"Using {primary_device} as primary device")
    
    # Initialize prior on the primary device
    prior = initialise_priors_SB28_splitGPU(
        df=df_info, 
        device=primary_device,  # This will be used consistently
        astro=True,
        dust=False
    )

    # Process data
    x_all = np.array([np.hstack(_x) for _x in x])
    x_all = torch.tensor(x_all, dtype=torch.float32)
    theta = torch.tensor(theta, dtype=torch.float32)
    
    
    # Ensure all data is on the same device
    x_all = torch.tensor(np.array([np.hstack(_x) for _x in x]), 
                        dtype=torch.float32, 
                        device=primary_device)
    theta = torch.tensor(theta, 
                        dtype=torch.float32, 
                        device=primary_device)


    # Create train/test split
    test_mask = create_test_mask()
    train_mask = ~test_mask
    
    # Training parameters
    train_args = {
        "training_batch_size": 128,
        "learning_rate": 5e-6,
        "stop_after_epochs": 200,
        "validation_fraction": 0.15
    }
    
    try:
        # Initialize training
        runner, nets = initialize_training(
            hidden_features=128,
            num_transforms=4,
            train_args=train_args,
            prior=prior,
            model_out_dir=model_out_dir,
            name=name,
            x_all=x_all,
            theta=theta,
            train_mask=train_mask
        )
        
        # Create data loader
        loader = NumpyLoader(
            x=x_all[train_mask].clone().detach(),
            theta=theta[train_mask].clone().detach()
        )
        
        # Train the model and get results
        posterior_ensemble, summaries = runner(loader=loader)
        
        print("Training completed successfully!")
        
        # Explicitly return the results
        return posterior_ensemble, summaries
        
    except Exception as e:
        print(f"An error occurred during training: {str(e)}")
        raise  # Re-raise the exception after printing it

if __name__ == "__main__":
    results = main()  # Store results in a single variable first
    if results is not None:  # Check if we got valid results
        posterior_ensemble, summaries = results  # Then unpack them
        print("Successfully retrieved training results")

        

### Drawing samples from the entire test set to look at overall performance of the model

In [None]:
# Get test data
x_test = x_all[test_mask]
theta_test = theta[test_mask]

# Number of samples to draw from posterior
n_samples = 1000

# Storage for predictions
all_samples = []
all_means = []
all_stds = []

# Generate posterior samples for each test point
for i in range(len(x_test)):
    # Get samples from the posterior
    samples = posterior_ensemble.sample(
        (n_samples,), 
        x=x_test[i].reshape(1, -1)
    ).cpu().numpy()
    
    # Calculate mean and std of samples
    mean = samples.mean(axis=0)
    std = samples.std(axis=0)
    
    all_samples.append(samples)
    all_means.append(mean)
    all_stds.append(std)

all_samples = np.array(all_samples)
all_means = np.array(all_means)
all_stds = np.array(all_stds)

In [None]:
param_names = df_pars.columns[1:29].tolist()  # Excluding 'name' column

fig, axes = plt.subplots(7, 4, figsize=(16, 28)) 
axes = axes.flatten()

fontsize = 10  

plt.rcParams['figure.constrained_layout.use'] = True  

# Plot each parameter
for i in range(28):
    ax = axes[i]
    
    # True vs predicted with error bars
    ax.errorbar(
        theta_test[:, i].cpu().numpy(),
        all_means[:, i],
        yerr=all_stds[:, i],
        fmt='.',
        color='k',
        ecolor='blue',
        capsize=0,
        elinewidth=0.8,  
        alpha=0.3,       
        markersize=5    
    )
    
    # Add true line
    lims = [
        min(ax.get_xlim()[0], ax.get_ylim()[0]),
        max(ax.get_xlim()[1], ax.get_ylim()[1])
    ]
    ax.plot(lims, lims, '--', color='black', alpha=0.5, linewidth=1)
    
    # get metrics
    rmse = np.sqrt(np.mean((theta_test[:, i].cpu().numpy() - all_means[:, i])**2))
    r2 = np.corrcoef(theta_test[:, i].cpu().numpy(), all_means[:, i])[0, 1]**2
    chi2 = np.mean(((theta_test[:, i].cpu().numpy() - all_means[:, i])**2) / (all_stds[:, i]**2))
    
    # add metrics box in top left corner
    stats_text = f'RMSE = {rmse:.2f}\n' + \
                 f'R² = {r2:.2f}\n' + \
                 f'χ² = {chi2:.2f}'
    ax.text(0.05, 0.95, stats_text,
            transform=ax.transAxes,
            bbox=dict(facecolor='white', alpha=0.8),
            verticalalignment='top',
            fontsize=fontsize-1)  # Slightly smaller font for stats
    
    # title: parameter name
    ax.set_title(param_names[i], fontsize=fontsize, pad=5)  # Reduced padding
    
    # axis labels
    ax.set_xlabel('True', fontsize=fontsize-1)
    ax.set_ylabel('Inferred', fontsize=fontsize-1)
    
    # tick labels
    ax.tick_params(axis='both', which='major', labelsize=fontsize-2)
    
    # internal padding
    ax.margins(x=0.05, y=0.05)

# subplot spacing
plt.subplots_adjust(
    left=0.01,    # Less space on left
    right=0.7,   # Less space on right
    bottom=0.05,  # Less space at bottom
    top=0.7,     # Less space at top
    wspace=0.2,   # Less space between plots horizontally
    hspace=0.2    # Less space between plots vertically
)


# Save figure with detailed filename
save_path = f'{plots_out_dir}/posterior_predictions_{model_params}.png'
plt.savefig(save_path, dpi=300, bbox_inches='tight', pad_inches=0.1)
print(save_path)
plt.show()

In [None]:
config_str = (f"batch{train_args['training_batch_size']}_"
             f"lr{train_args['learning_rate']}_"
             f"epochs{train_args['stop_after_epochs']}_"
             f"h{hidden_features}_t{num_transforms}")

# coverage plots
metric = PosteriorCoverage(
    num_samples=int(4e3),
    sample_method='direct',
    labels=cam.labels,
    plot_list=["tarp"], # "coverage", "histogram", "predictions", 
    out_dir=plots_out_dir,
)

# Generate plots
figs = metric(
    posterior=posterior_ensemble,
    x=x_all[test_mask].cpu(),
    theta=theta[test_mask, :].cpu(),
    signature=f"coverage_{name}_{config_str}_"  # Add config to filename
)

config_text = (
    f"Training Config:\n"
    f"Batch Size: {train_args['training_batch_size']}\n"
    f"Learning Rate: {train_args['learning_rate']}\n"
    f"Epochs: {train_args['stop_after_epochs']}\n"
    f"Hidden Features: {hidden_features}\n"
    f"Num Transforms: {num_transforms}"
)

# Process each figure
for i, fig in enumerate(figs):
    plt.figure(fig.number)  # Activate the figure
    plt.figtext(0.02, 0.98, config_text,
                fontsize=8,
                bbox=dict(facecolor='white', alpha=0.8, edgecolor='gray'),
                verticalalignment='top')
    
    # Save each figure with type indicator
    plot_types = ["tarp"] #"coverage", "histogram", "predictions",
    plt.savefig(os.path.join(plots_out_dir, 
                f'metric_{plot_types[i]}_{name}_{config_str}.png'), 
                dpi=300, bbox_inches='tight')
    plt.show()
    plt.close()

### Looking at a specific case (one observation randomly set with seed)

In [None]:
# Now, SBIRunner returns a custom class instance to be able to pass signature strings
# 1. prints our info on model configuration and architecture
print(posterior_ensemble.signatures)


# 2. choose a random input for training
seed_in = 49
np.random.seed(seed_in) # set seed for reproducability
ind = np.random.randint(len(x_train[0])) # choose observation (random index from training data)

# 3. generate posterior samples
seed_samp = 32
torch.manual_seed(seed_samp)# set seed for reproducability
# then, for the chosen training sample (as chosen above in 2.)
# generate 1000 samples from the posterior distribution using accept/reject sampling
samples = posterior_ensemble.sample(
    (1000,), 
    torch.Tensor(x_train[0][ind]).to(device))

# 4. calculate the probability densities for each sample
# i.e for each generated sample, calculate how likely it is using learned posterior distribution
log_prob = posterior_ensemble.log_prob(
    samples, # the generated samples from 3.
    torch.Tensor(x_train[0][ind]).to(device) # the chosen observation from 2.
    )

# convert to numpy so can read easier.
samples = samples.cpu().numpy()
log_prob = log_prob.cpu().numpy()

from matplotlib.gridspec import GridSpec
def plot_posterior_samples_grid(samples, log_prob, param_names, df_info, model_name, train_args):
   n_params = len(param_names)
   n_cols = 4
   n_rows = (n_params + n_cols - 1) // n_cols
   
   fig = plt.figure(figsize=(20, 5*n_rows))
   
   # Add main title
   fig.suptitle('Posterior Probability Distributions', fontsize=16, y=0.98)
   
   gs = GridSpec(n_rows, n_cols, figure=fig)
   
   # Model info text
   model_info = (
       f"Model Config:\n"
       f"Name: {model_name}\n"
       f"Hidden Features: {hidden_features}\n"
       f"Num Transforms: {num_transforms}\n"
       f"\nTraining Args:\n"
       f"Batch Size: {train_args['training_batch_size']}\n"
       f"Learning Rate: {train_args['learning_rate']}\n"
       f"Stop After Epochs: {train_args['stop_after_epochs']}"
   )
   
   fig.text(0.02, 0.96, model_info, 
            fontsize=8,
            bbox=dict(facecolor='white', alpha=0.8, edgecolor='gray'),
            verticalalignment='top')
   
   for i, name in enumerate(param_names):
       row = i // n_cols
       col = i % n_cols
       
       ax = fig.add_subplot(gs[row, col])
       data = samples[:, i]
       param_info = df_info[df_info['ParamName'] == name].iloc[0]
       is_log = param_info['LogFlag'] == 1
       
       if is_log:
           ax.hist(data, bins=50, density=True, alpha=0.6)
           ax.set_xscale('log')
           log_data = np.log10(data)
           mean = np.mean(log_data)
           std = np.std(log_data)
           stats_text = f'Log10 Mean: {mean:.3f}\nLog10 Std: {std:.3f}'
           ax.set_xlabel('Parameter Value (log scale)', fontsize=8)
       else:
           ax.hist(data, bins=50, density=True, alpha=0.6)
           mean = np.mean(data)
           std = np.std(data)
           stats_text = f'Mean: {mean:.3f}\nStd: {std:.3f}'
           ax.set_xlabel('Parameter Value', fontsize=8)
       
       ax.set_ylabel('Density', fontsize=8)
       
       ax.axvline(param_info['MinVal'], color='g', linestyle=':', alpha=0.5, label='Min')
       ax.axvline(param_info['MaxVal'], color='g', linestyle=':', alpha=0.5, label='Max')
       ax.axvline(param_info['FiducialVal'], color='r', linestyle='--', alpha=0.5, label='Fiducial')
       
       ax.text(0.02, 0.95, stats_text, transform=ax.transAxes, 
               verticalalignment='top', fontsize=8, 
               bbox=dict(facecolor='white', alpha=0.8))
       
       ax.set_title(f"{name}\n{param_info['Description']}", fontsize=8, pad=5)
       ax.tick_params(labelsize=8)
       
       if i == 0:
           ax.legend(loc='upper right', fontsize=8)
   
   plt.tight_layout()
   plt.subplots_adjust(top=0.93)  # Adjusted to make room for main title
   return fig

# Get all parameter names from df_info
param_names = df_info['ParamName'].tolist()

# Now try plotting again with the correct parameter names
fig = plot_posterior_samples_grid(
    samples, 
    log_prob, 
    param_names,  # Now contains all 28 parameter names correctly
    df_info,
    model_name=name,
    train_args=train_args
)

# Save with model config in filename
save_name = (f'parameter_posteriors_grid_{name}_'
            f'h{hidden_features}_t{num_transforms}_'
            f'b{train_args["training_batch_size"]}_'
            f'e{train_args["stop_after_epochs"]}.png')

os.makedirs(plots_out_dir, exist_ok=True)
plt.savefig(os.path.join(plots_out_dir, save_name), 
            dpi=300, 
            bbox_inches='tight')