Small demo notebook for team meeting (#26)

* Putting together small demo notebook for team meeting. * Add a zgrid definition. * Fix file path.
LSSTDESC · May 6, 2024 · ab0394a · ab0394a
1 parent 6b0a9ae
commit ab0394a
Show file tree

Hide file tree

Showing 5 changed files with 209 additions and 5 deletions.
diff --git a/examples/data/output_table_conv_test.hdf5 b/examples/data/output_table_conv_test.hdf5
diff --git a/examples/data/output_table_conv_train.hdf5 b/examples/data/output_table_conv_train.hdf5
diff --git a/examples/small_demo.ipynb b/examples/small_demo.ipynb
@@ -0,0 +1,202 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This example notebook uses synthetic data produced by PZFlow in combination with several predefined SED templates and filter definition files."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from rail.estimation.algos.lephare import LephareInformer, LephareEstimator\n",
+    "import numpy as np\n",
+    "import lephare as lp\n",
+    "from rail.core.stage import RailStage\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "DS = RailStage.data_store\n",
+    "DS.__class__.allow_overwrite = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we load previously created synthetic data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tables_io\n",
+    "\n",
+    "trainFile = './data/output_table_conv_train.hdf5'\n",
+    "testFile = './data/output_table_conv_test.hdf5'\n",
+    "\n",
+    "traindata_io = tables_io.read(trainFile)\n",
+    "testdata_io = tables_io.read(testFile)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Retrieve all the required filter and template files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lephare_config_file = \"../src/rail/estimation/algos/lsst.para\"\n",
+    "lephare_config = lp.read_config(lephare_config_file)\n",
+    "\n",
+    "lp.data_retrieval.get_auxiliary_data(keymap=lephare_config)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We use the inform stage to create the library of SEDs with various redshifts, extinction parameters, and reddening values. This typically takes ~3-4 minutes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inform_lephare = LephareInformer.make_stage(\n",
+    "    name=\"inform_Lephare\",\n",
+    "    nondetect_val=np.nan,\n",
+    "    model=\"lephare.pkl\",\n",
+    "    hdf5_groupname=\"\",\n",
+    ")\n",
+    "\n",
+    "inform_lephare.inform(traindata_io)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we take the sythetic test data, and find the best fits from the library. This results in a PDF, zmode, and zmean for each input test data. Takes ~2 minutes to run on 1500 inputs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "estimate_lephare = LephareEstimator.make_stage(\n",
+    "    name=\"test_Lephare\",\n",
+    "    nondetect_val=np.nan,\n",
+    "    model=inform_lephare.get_handle(\"model\"),\n",
+    "    hdf5_groupname=\"\",\n",
+    "    aliases=dict(input=\"test_data\", output=\"lephare_estim\"),\n",
+    ")\n",
+    "\n",
+    "lephare_estimated = estimate_lephare.estimate(testdata_io)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "An example lephare PDF and comparison to the true value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "indx = 0\n",
+    "zgrid = np.linspace(0,3,300)\n",
+    "plt.plot(zgrid, np.squeeze(lephare_estimated.data[indx].pdf(zgrid)), label='Estimated PDF')\n",
+    "plt.axvline(x=testdata_io['redshift'][indx], color='r', label='True redshift')\n",
+    "plt.legend()\n",
+    "plt.xlabel('z')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "More example fits"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "indxs = [8, 16, 32, 64, 128, 256, 512, 1024]\n",
+    "zgrid = np.linspace(0,3,300)\n",
+    "fig, axs = plt.subplots(2,4, figsize=(20,6))\n",
+    "for i, indx in enumerate(indxs):\n",
+    "    ax = axs[i//4, i%4]\n",
+    "    ax.plot(zgrid, np.squeeze(lephare_estimated.data[indx].pdf(zgrid)), label='Estimated PDF')\n",
+    "    ax.axvline(x=testdata_io['redshift'][indx], color='r', label='True redshift')\n",
+    "    ax.set_xlabel('z')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Histogram of the absolute difference between lephare estimate and true redshift"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "estimate_diff_from_truth = np.abs(lephare_estimated.data.ancil['zmode'] - testdata_io['redshift'])\n",
+    "\n",
+    "plt.figure()\n",
+    "plt.hist(estimate_diff_from_truth, 100)\n",
+    "plt.xlabel('abs(z_estimated - z_true)')\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "new_rail",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/rail/estimation/algos/lephare.py b/src/rail/estimation/algos/lephare.py
@@ -32,7 +32,7 @@ class LephareInformer(CatInformer):
         ),
         star_sed=Param(
             str,
-            "$LEPHAREDIR/examples/STAR_MOD_ALL.list",
+            "$LEPHAREDIR/sed/STAR/STAR_MOD_ALL.list",
             msg="Path to text file containing list of star SED templates",
         ),
         qso_sed=Param(
@@ -147,6 +147,8 @@ def run(self):
         # Get number of sources
         ngal = len(training_data[self.config.ref_band])
 
+        lp.data_retrieval.get_auxiliary_data(keymap=self.lephare_config)
+
         # The three main lephare specific inform tasks
         self._create_filter_library()
         self._create_sed_library()

diff --git a/src/rail/estimation/algos/lsst.para b/src/rail/estimation/algos/lsst.para
@@ -4,17 +4,17 @@
 ##############################################################################################
 #
 #-------------------    STELLAR LIBRARY (ASCII SEDs)        ---------------------------       
-STAR_SED        STAR_SWIRE.list         # STAR list (full path)
+STAR_SED        sed/STAR/STAR_MOD_ALL.list         # STAR list (full path)
 STAR_LIB	LSST_STAR_BIN 		# Binary STAR LIBRARY (-> $ZPHOTWORK/lib_bin/*)
 STAR_FSCALE	3.432E-09		# Arbitrary Flux Scale 
 #
 #-------------------    QSO LIBRARY (ASCII SEDs)            ---------------------------  
-QSO_SED		AGN_LONSDALE.list       # QSO list (full path)
+QSO_SED		sed/QSO/SALVATO09/AGN_MOD.list       # QSO list (full path)
 QSO_LIB		LSST_QSO_BIN 			# Binary QSO LIBRARY (-> $ZPHOTWORK/lib_bin/*)
 QSO_FSCALE	1.			# Arbitrary Flux Scale 
 #
 #-------------------  GALAXY LIBRARY (ASCII or BINARY SEDs) ---------------------------
-GAL_SED		 CE_MOD.list            # GALAXMuzzin09_SEDY list (full path)
+GAL_SED		 examples/COSMOS_MOD.list            # GALAXMuzzin09_SEDY list (full path)
 GAL_LIB	         LSST_GAL_BIN 	                # Binary GAL LIBRARY (-> $ZPHOTWORK/lib_bin/*)
 GAL_FSCALE      1.                      # Arbitrary Flux Scale
 #SEL_AGE 	/data/zphot_vers25_03_03/sed/GAL/HYPERZ/AGE_GISSEL_HZ.dat	# List of Age for GISSEL(full path)
@@ -64,7 +64,7 @@ ZGRID_TYPE      0               # Define the kind of redshift grid (0: linear ;
 Z_STEP 		0.01,0.,7. 	# dz, zmin, zmax 
 COSMOLOGY	70,0.3,0.7	# H0,om0,lbd0    (if lb0>0->om0+lbd0=1)
 MOD_EXTINC 	0,0		# model range for extinction 
-EXTINC_LAW	SB_calzetti.dat	# ext. law (in  $ZPHOTDIR/ext/*)
+EXTINC_LAW	SMC_prevot.dat,SB_calzetti.dat,SB_calzetti_bump1.dat,SB_calzetti_bump2.dat	# ext. law (in  $ZPHOTDIR/ext/*)
 EB_V            0.,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.5 # E(B-V) (<50 values)
 EM_LINES        EMP_UV          # [NO/EMP_UV/EMP_SFR/PHYS] choice between emission line prescription  
 EM_DISPERSION   0.5,0.75,1.,1.5,2. # Dispersion allowed in the emission line flux factor