# COSMOS LePhare example

The default data for testing LePHARE is the COSMOS dataset.

In this example we use RAIL to run the standard LePHARE COSMOS example.

In this example we use fluxes not magnitudes. In order to use magnitudes you must both update the config and the values.

In [None]:
from rail.estimation.algos.lephare import LephareInformer, LephareEstimator
import numpy as np
import lephare as lp
from rail.core.stage import RailStage
import matplotlib.pyplot as plt
from astropy.table import Table
import astropy.units as u
from collections import OrderedDict
from rail.core.data import TableHandle

import os

DS = RailStage.data_store
DS.__class__.allow_overwrite = True

Here we load previously created synthetic data

Retrieve all the required filter and template files

In [None]:
lephare_config = lp.default_cosmos_config
# For useable science results you must use a denser redshift grid by commenting out the following line which will revert to the config dz of 0.01.
lephare_config['Z_STEP']= ".1,0.,7."
nobj=100 # Increase to run on more objects. Set to -1 to run on all.

lp.data_retrieval.get_auxiliary_data(keymap=lephare_config, additional_files=["examples/COSMOS.in","examples/output.para"])


In [None]:
bands=lephare_config['FILTER_LIST'].split(',')
len(bands)

In [None]:
# For a test lets just look at the first 100 objects
cosmos=Table.read(os.path.join(lp.LEPHAREDIR,"examples/COSMOS.in"),format='ascii')[:nobj]

In [None]:
print(f"We will run on {len(cosmos)} objects.")

In [None]:
# The example file is in the historical lephare format.
data=OrderedDict()
flux_cols=[]
flux_err_cols=[]
for n,b in enumerate(bands):
    #print(1+2*n,2+2*n)
    flux=cosmos[cosmos.colnames[1+2*n]]
    flux_err=cosmos[cosmos.colnames[2+2*n]]
    data[f"flux_{b}"]=flux
    flux_cols.append(f"flux_{b}")
    data[f"flux_err_{b}"]=flux_err
    flux_err_cols.append(f"flux_err_{b}")
data["redshift"]=np.array(cosmos[cosmos.colnames[-2]])

In [None]:
data_io = DS.add_data("tomo_bin", data, TableHandle)

We use the inform stage to create the library of SEDs with various redshifts, extinction parameters, and reddening values.

In [None]:
inform_lephare = LephareInformer.make_stage(
    name="inform_COSMOS",
    nondetect_val=np.nan,
    model="lephare.pkl",
    hdf5_groupname="",
    lephare_config=lephare_config,
    bands=flux_cols,
    err_bands=flux_err_cols,
    ref_band=flux_cols[0],
)

inform_lephare.inform(data_io)

Now we take the sythetic test data, and find the best fits from the library. This results in a PDF, zmode, and zmean for each input test data. 

In [None]:
estimate_lephare = LephareEstimator.make_stage(
    name="test_Lephare_COSMOS",
    nondetect_val=np.nan,
    model=inform_lephare.get_handle("model"),
    hdf5_groupname="",
    aliases=dict(input="test_data", output="lephare_estim"),
    bands=flux_cols,
    err_bands=flux_err_cols,
    ref_band=flux_cols[0],
)

lephare_estimated = estimate_lephare.estimate(data_io)

In [None]:
lephare_config["AUTO_ADAPT"] 

An example lephare PDF and comparison to the true value

In [None]:
indx = 1
zgrid = np.linspace(0,7,1000)
plt.plot(zgrid, np.squeeze(lephare_estimated.data[indx].pdf(zgrid)), label='Estimated PDF')
plt.axvline(x=data_io.data['redshift'][indx], color='r', label='True redshift')
plt.legend()
plt.xlabel('z')
plt.show()

More example fits

In [None]:
indxs = [8, 16, 32, 64, 65, 66, 68, 69] #, 128, 256, 512, 1024]
zgrid = np.linspace(0,7,1000)
fig, axs = plt.subplots(2,4, figsize=(20,6))
for i, indx in enumerate(indxs):
    ax = axs[i//4, i%4]
    ax.plot(zgrid, np.squeeze(lephare_estimated.data[indx].pdf(zgrid)), label='Estimated PDF')
    ax.axvline(x=data_io.data['redshift'][indx], color='r', label='True redshift')
    ax.set_xlabel('z')

Histogram of the absolute difference between lephare estimate and true redshift

In [None]:
estimate_diff_from_truth = np.abs(lephare_estimated.data.ancil['zmode'] - data['redshift'])

plt.figure()
plt.hist(estimate_diff_from_truth, 100)
plt.xlabel('abs(z_estimated - z_true)')
plt.show()

In [None]:
plt.scatter(data['redshift'],lephare_estimated.data.ancil['Z_BEST'])
plt.xlabel('$z_{spec}$')
plt.ylabel('$z_{LePHARE}$')

In [None]:
plt.scatter(data['redshift'],lephare_estimated.data.ancil['zmean'])
plt.xlabel('$z_{spec}$')
plt.ylabel('$z_{LePHARE}$')