# Inference Example

In this notebook, we take a quick look at how to load real and simulated data, and train a given machine learning model.

In [None]:
import spt
import spt.config as cfg

import random
import numpy as np

Just because we're in a notebook, we'll change directory to the root of the SPItorch project so that our example file paths will work on any machine. We'll also take care of some other setup stuff:

In [None]:
try: # One-time setup
    assert(_SETUP)
except NameError:
    import os
    import torch as t
    os.chdir(os.path.split(spt.__path__[0])[0])
    dtype = t.float32
    device = t.device("cuda") if t.cuda.is_available() else t.device("cpu")
    if device == t.device("cuda"):
        print(f'Using GPU for training')
        !nvidia-smi -L
    else:
        print("CUDA is unavailable; training on CPU.")
    _SETUP = True

## Offline Model Training

Let's begin by loading the _inference parameters_ from the configuration file, and inspecting the contents:

In [None]:
ip = cfg.InferenceParams()
# ip.dataset_loc = './data/dsets/dev'
ip.use_existing_checkpoints = False
ip.retrain_model = False
print(ip)

Lets initialise a model (in this example, a _sequential autoregressive network_):

In [None]:
mp = cfg.SANParams()
model = ip.model(mp)

We can now create some data loaders (one for training, another with test data).

In [None]:
from spt.load_photometry import load_simulated_data, get_norm_theta
fp = cfg.ForwardModelParams()

train_loader, test_loader = load_simulated_data(
    path=ip.dataset_loc,
    split_ratio=ip.split_ratio,
    batch_size=model.params.batch_size,
    test_batch_size=1,
    phot_transforms=[lambda x: t.from_numpy(np.log(x))],
    theta_transforms=[get_norm_theta(fp)],
)

In [None]:
model.offline_train(train_loader, ip)

In [None]:
# test dataset
tds = test_loader.dataset
def new_sample() -> tuple[int, int, tuple[t.Tensor, t.Tensor]]:
    ds_idx = random.randint(0, len(tds)-1) # test loader index
    xys = tds[ds_idx]
    idx = tds.indices[ds_idx]
    return ds_idx, idx, xys
def sample_at(ds_idx: int) -> tuple[int, t.Tensor, t.Tensor]:
    xys = tds[ds_idx]
    idx = tds.indices[ds_idx]
    return idx, xys

# OLD:
# sim_xs, sim_ys = spt.utils.new_sample(test_loader, 1)

In [None]:
from spt.visualisation import plot_corner

In [None]:
# NOTE: this should be done automatically at the end of offline_train...
model.eval()

We can now create a corner plot with this data:

In [None]:
# ds_idx, sim_idx, (sim_xs, sim_ys) = new_sample()

# for reproducibility
ds_idx = 947898
sim_idx, (sim_xs, sim_ys) = sample_at(ds_idx)

with t.inference_mode():
    samples = model.sample(sim_xs, n_samples=10000).cpu()

plot_corner(samples=samples.squeeze().numpy(), true_params=sim_ys,
            title=f'Sequential Autoregressive Network',
            description=str(model))

In [None]:
from spt.utils import get_median_mode
median, mode = get_median_mode(samples)

In [None]:
fmp = spt.config.ForwardModelParams()
dt = spt.load_photometry.get_denorm_theta(fmp)
denorm_mode = dt(mode[None,:]).squeeze()
denorm_median = dt(median[None,:]).squeeze()

# Verify Predictions against MCMC on simulated sample

In [None]:
fs = cfg.ForwardModelParams().filters
phot_obs = np.exp(sim_xs.numpy())  # de-normalise the (simulated) model input
obs = spt.load_photometry.sim_observation(fs, phot_obs, index=sim_idx, dset=ip.dataset_loc)

In [None]:
p = spt.Prospector(obs)

In [None]:
p.visualise_obs(show=True, save=False, title="Simulated Observation")

## Forward Model using SAN Predictions

In [None]:
p.visualise_model(denorm_median, show=True, save=False, title="Forward Model with Network Predictions")

## Forward model with MCMC parameter predictions

In [None]:
p.emcee_fit(always_fit=False)

In [None]:
p.visualise_model(show=True, save=False, title="Forward Model with EMCEE Results")

## Initialising MCMC at model predictions

Can we speed up the MCMC procedure by initialising the parameters at the machine learning model's outputs?

In [None]:
p.set_theta(denorm_median)

In [None]:
ep = cfg.EMCEEParams()
ep.optimise = False
p.emcee_fit(ep, always_fit=True)

In [None]:
p.visualise_model(show=True, save=False, title="EMCEE Params with SAN Initialisation")

## Other MCMC plots

In [None]:
import prospect.io.read_results as reader
# from spt.types import MCMCMethod
# p.load_fit_results(index=obs['idx'], method=MCMCMethod.EMCEE, survey=obs['survey'])
p.load_fit_results(file='./results/mcmc/emcee_samples/photometry_sim_10000000_5942041.h5')

In [None]:
chosen = np.random.choice(p.fit_results["run_params"]["nwalkers"], size=10, replace=False)
tracefig = reader.traceplot(p.fit_results, figsize=(20, 10), chains=chosen)

In [None]:
imax = np.argmax(p.fit_results['lnprobability'])
i, j = np.unravel_index(imax, p.fit_results['lnprobability'].shape)
theta_max = p.fit_results['chain'][i, j, :].copy()
thin = 5

import matplotlib.pyplot as plt
    
print(f'Optimization value: {theta_max}')
print(f'MAP value: {theta_max}')
cornerfig = reader.subcorner(p.fit_results, start=0, thin=thin, truths=theta_max,
                             fig=plt.subplots(6, 6, figsize=(27, 27), dpi=100)[0])

# Real Observations

Now we attempt to run the trained model on a real observation from a catalogue.

In [None]:
real_obs = spt.load_observation()
obs_idx = real_obs['idx']
required_cols = [f.maggie_col for f in fp.filters]
real_xs = t.tensor(real_obs[required_cols].values.astype(np.float64)).log().to(device, dtype)

In [None]:
with t.inference_mode():
    real_samples = model.sample(real_xs, n_samples=10000).cpu()

plot_corner(samples=real_samples.squeeze().numpy(), true_params=None,
            title=f'Sequential Autoregressive Network',
            description=str(model))

In [None]:
real_median, real_mode = get_median_mode(real_samples)

In [None]:
fmp = spt.config.ForwardModelParams()
dt = spt.load_photometry.get_denorm_theta(fmp)
real_denorm_mode = dt(real_mode[None,:]).squeeze()
real_denorm_median = dt(real_median[None,:]).squeeze()

## Comparison to Prospector Fitting

In [None]:
real_p = spt.Prospector(real_obs)

In [None]:
real_p.visualise_obs(show=True, save=False, title=f'Real Observation ({real_obs["survey"]}:{int(real_obs["idx"])})')

In [None]:
real_p.visualise_model(real_denorm_mode, show=True, save=False, title=f'SAN Parameter Predictions ({real_obs["survey"]}:{int(real_obs["idx"])})')

## Perform MCMC sampling

In [None]:
real_p.emcee_fit(always_fit=False)

In [None]:
real_p.visualise_model(show=True, save=False, title=f'EMCEE Results ({real_obs["survey"]}:{int(real_obs["idx"])})')

### 