From 96fb1bbdbc1c06ec3d23c454e196b83ec3f53c99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Brigitta=20Sip=C5=91cz?= <bsipocz@gmail.com>
Date: Tue, 23 Jul 2024 15:42:05 -0700
Subject: [PATCH 1/5] Removing parquet cosmosim notebook

---
 conf.py                                 |   4 +-
 index.md                                |   1 +
 tutorials/cosmosims/CosmoDC2_Parquet.md | 250 ------------------------
 3 files changed, 2 insertions(+), 253 deletions(-)
 delete mode 100644 tutorials/cosmosims/CosmoDC2_Parquet.md

diff --git a/conf.py b/conf.py
index 4ba368c1..9218b8bd 100644
--- a/conf.py
+++ b/conf.py
@@ -1,5 +1,3 @@
-
-
 # Configuration file for the Sphinx documentation builder.
 #
 # -- Path setup --------------------------------------------------------------
@@ -40,7 +38,7 @@
 
 # MyST-NB configuration
 nb_execution_timeout = 900
-nb_execution_excludepatterns = ['CosmoDC2_Parquet.md', 'Parallelize_Convolution.md']
+nb_execution_excludepatterns = ['Parallelize_Convolution.md']
 
 
 # -- Options for HTML output -------------------------------------------------
diff --git a/index.md b/index.md
index 072df7fa..0181cdde 100644
--- a/index.md
+++ b/index.md
@@ -87,6 +87,7 @@ maxdepth: 1
 
 tutorials/openuniversesims/openuniverse2024_roman_simulated_timedomainsurvey
 tutorials/openuniversesims/openuniverse2024_roman_simulated_wideareasurvey
+
 ```
 
 ## Generally useful techniques
diff --git a/tutorials/cosmosims/CosmoDC2_Parquet.md b/tutorials/cosmosims/CosmoDC2_Parquet.md
deleted file mode 100644
index 2290715b..00000000
--- a/tutorials/cosmosims/CosmoDC2_Parquet.md
+++ /dev/null
@@ -1,250 +0,0 @@
----
-jupytext:
-  text_representation:
-    extension: .md
-    format_name: myst
-    format_version: 0.13
-    jupytext_version: 1.16.2
-kernelspec:
-  display_name: Python 3
-  language: python
-  name: python3
----
-
-# Working with CosmoDC2 Parquet files
-
-This notebook shows how to work with the bulk-downloadable CosmoDC2 files in Parquet format. Goals include:
-
-* how to read in a single file
-* how to plot galaxy properties
-* how to read in selected columns
-* how to read from all of the files
-
-+++
-
-### Imports
-
-The pyarrow package is the underlying engine for reading these files
-
-```{code-cell} ipython3
-import os
-
-from astropy.table import Table
-import dask.dataframe as dd
-import numpy as np
-import matplotlib.colors as clr
-import matplotlib.pyplot as plt
-import pandas as pd
-import pyarrow.parquet as pq
-```
-
-```{code-cell} ipython3
-%matplotlib inline
-```
-
-### Set data location
-
-+++
-
-Modify the `inpath` to your location for the Parquet files.
-
-```{code-cell} ipython3
-inpath = '/stage/irsa-data-download10/parquet-work/CosmoDC2'
-```
-
-Select a single file for the first demonstrations. The files are numbered by HEALPix number with NSIDE=32. The example here is near the center of the 440 sq deg covered by the simulations.
-
-```{code-cell} ipython3
-infile = 'cosmoDC2_healpix09560.parquet'
-```
-
-```{code-cell} ipython3
-joined = os.path.join(inpath, infile)
-```
-
-### Get all column names and types
-
-+++
-
-Read a table of zero length to get the column names.
-
-```{code-cell} ipython3
-schema = Table.read(joined, schema_only=True)
-```
-
-The column names are in case-sensitive alphabetical order here.
-
-```{code-cell} ipython3
-schema
-```
-
-Alternatively, use the pyarrow package directly.
-
-```{code-cell} ipython3
-schema = pq.read_schema(os.path.join(inpath, infile))
-```
-
-```{code-cell} ipython3
-len(schema)
-```
-
-Capitalized magnitudes are absolute magnitudes. Descriptions of the columns are not included in the Parquet files, but are in a separate file (to be specified).
-
-```{code-cell} ipython3
-print(schema)
-```
-
-### Read only the redshifts as a single Astropy table
-
-Reading a single column is faster with Parquet files.
-
-```{code-cell} ipython3
-tab = Table.read(joined, include_names=['redshift'])
-```
-
-```{code-cell} ipython3
-tab
-```
-
-### Read in all data from a single file
-
-```{code-cell} ipython3
-d=pd.read_parquet(joined)
-```
-
-```{code-cell} ipython3
-len(d)
-```
-
-### Plot number counts vs halo mass
-
-```{code-cell} ipython3
-plt.figure()
-h,xbins = np.histogram(np.log10(d['halo_mass']),bins=40)
-xbins_avg = (xbins[1:]+xbins[:-1])/2.0
-plt.semilogy(xbins_avg, h)
-plt.ylabel(r'Galaxy Count')
-plt.xlabel(r'log10( M$_{\rm{halo}}$ / M$_\odot)$')
-plt.show()
-```
-
-### Plot g-r colors vs redshift
-
-```{code-cell} ipython3
-plt.figure()
-gal_clr = d['mag_g_sdss']-d['mag_r_sdss']
-plt.hist2d(d['redshift'], gal_clr, bins=100, cmap='PuBu', norm=clr.LogNorm())
-plt.colorbar(label='population density')
-plt.ylabel('Observed g-r')
-plt.xlabel('redshift')
-plt.title('Galaxy Colors in Clusters')
-plt.tight_layout()
-```
-
-### Plot r-i colors vs redshift
-
-```{code-cell} ipython3
-plt.figure()
-gal_clr = d['mag_r_sdss']-d['mag_i_sdss']
-plt.hist2d(d['redshift'], gal_clr, bins=100, cmap='PuBu',norm=clr.LogNorm())
-plt.colorbar(label='population density')
-plt.ylabel('r-i')
-plt.xlabel('redshift')
-plt.title('Galaxy Colors in Clusters')
-plt.tight_layout()
-plt.show()
-```
-
-### Read one Parquet file using Pandas, and plot a histogram of redshifts
-
-```{code-cell} ipython3
-df = pd.read_parquet(joined, columns=['redshift'])
-```
-
-```{code-cell} ipython3
-num_bins = 20
-n, bins, patches = plt.hist(df.redshift, num_bins,
-                            facecolor='blue', alpha=0.5)
-plt.xlabel('Redshift')
-plt.ylabel('Number')
-plt.title('Redshift Histogram CosmoDC2 Mock Catalog V1 Abridged')
-```
-
-### Count the number of records in all the files
-
-+++
-
-This section assumes you have downloaded all the files, or at least multiple files.
-
-```{code-cell} ipython3
-ddf = dd.read_parquet(os.path.join(inpath, '*parquet'),
-                      engine='pyarrow',
-                      columns=['stellar_mass'])
-```
-
-```{code-cell} ipython3
-%%time
-len(ddf)
-```
-
-### Read multiple files using Dask, and plot the galaxy main sequence in a redshift interval
-
-+++
-
-This section assumes you have downloaded all the files, or at least multiple files.
-Read in stellar mass and absolute magnitudes in a redshift interval from 0.5 to 0.54.
-
-```{code-cell} ipython3
-ddf = dd.read_parquet(os.path.join(inpath, '*parquet'),
-                      engine='pyarrow',
-                      columns=['stellar_mass',
-                               'Mag_true_g_lsst_z0',
-                               'Mag_true_r_lsst_z0',
-                               'Mag_true_i_lsst_z0',
-                               'Mag_true_z_lsst_z0',
-                               'Mag_true_y_lsst_z0',
-                               'redshift'],
-                      filters=[('redshift', '>', 0.5), 
-                               ('redshift', '<', 0.54)])
-```
-
-The next cell does the data read and can take several minutes.
-
-```{code-cell} ipython3
-%%time
-df2d = ddf.compute()
-```
-
-```{code-cell} ipython3
-df2d.head()
-```
-
-```{code-cell} ipython3
-len(df2d)
-```
-
-Since this results in almost 4 million galaxies, we will construct 2D histograms 
-rather than scatter plots.
-
-```{code-cell} ipython3
-
-plt.hist2d(df2d.Mag_true_r_lsst_z0, 
-           (df2d.Mag_true_g_lsst_z0 - df2d.Mag_true_r_lsst_z0),
-           bins=200, cmap='plasma', cmax = 500)
-
-# Plot a colorbar with label.
-cb = plt.colorbar()
-cb.set_label('Number')
-
-# Add title and labels to plot.
-plt.title('True magnitudes and colors in 0.5 < z < 0.54')
-plt.xlabel('LSST Mag r')
-plt.ylabel('LSST rest-frame g-r color')
-
-# Show the plot.
-plt.show()
-```
-
-```{code-cell} ipython3
-
-```

From b2b1a582ea97976dc018394696e2e04f96c6ca2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Brigitta=20Sip=C5=91cz?= <bsipocz@gmail.com>
Date: Wed, 24 Jul 2024 13:46:04 -0700
Subject: [PATCH 2/5] CONT: adding cosmoDC2 TAP access tutorial

---
 tutorials/cosmodc2/cosmoDC2_TAP_access.md | 183 ++++++++++++++++++++++
 1 file changed, 183 insertions(+)
 create mode 100644 tutorials/cosmodc2/cosmoDC2_TAP_access.md

diff --git a/tutorials/cosmodc2/cosmoDC2_TAP_access.md b/tutorials/cosmodc2/cosmoDC2_TAP_access.md
new file mode 100644
index 00000000..647c4b9f
--- /dev/null
+++ b/tutorials/cosmodc2/cosmoDC2_TAP_access.md
@@ -0,0 +1,183 @@
+---
+jupytext:
+  formats: md:myst
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+    jupytext_version: 1.16.2
+kernelspec:
+  display_name: Python 3 (ipykernel)
+  language: python
+  name: python3
+---
+
+
+
+This tutorial demonstrates how to access the CosmoDC2 Mock V1 catalogs. More information about these catalogs can be found here: https://irsa.ipac.caltech.edu/Missions/cosmodc2.html
+
+# Use IRSA's Virtual Observatory Table Access Protocol (TAP) Service to access these catalogs.
+
+This catalog can be accessed through IRSA's Table Access Protocol (TAP) service. See https://www.ivoa.net/documents/TAP/ for details on the protocol. This service can be accessed through Python using the pvyo library.
+
+```{code-cell} ipython3
+import pyvo as vo
+```
+
+```{code-cell} ipython3
+service = vo.dal.TAPService("https://irsa.ipac.caltech.edu/TAP")
+```
+
+## List the available DC2 tables
+
+```{code-cell} ipython3
+tables = service.tables
+for tablename in tables.keys():
+    if not "tap_schema" in tablename:
+        if "dc2" in tablename:
+            tables[tablename].describe()
+```
+
+## Choose the DC2 catalog you want to work with.
+
+IRSA currently offers 3 versions of the DC2 catalog.
+
+* ``cosmodc2mockv1_new`` has been optimized to make searches with constraints on stellar mass and redshift fast.
+
+* ``cosmodc2mockv1`` has been optimized to make searches with spatial constraints fast.
+
+* ``cosmodc2mockv1_heavy`` is the same as ``cosmodc2mockv1_new``, except that it does not contain galaxies with stellar masses <= 10^7 solar masses.
+
+If you are new to the DC2 catalog, we recommend that you start with ``cosmodc2mockv1_heavy``
+
+```{code-cell} ipython3
+# Choose the abridged table to start with.
+# Queries should be faster on smaller tables.
+
+tablename = 'cosmodc2mockv1_heavy'
+```
+
+## How many rows are in the chosen table?
+
+With TAP, you can query catalogs with constraints specified in IVOA Astronomical Data Query Language (ADQL; https://www.ivoa.net/documents/latest/ADQL.html), which is based on SQL.
+
+```{code-cell} ipython3
+# For example, this snippet of ADQL counts the number of elements in 
+# the redshift column of the table we chose.
+adql = f"SELECT count(redshift) FROM {tablename}"
+adql
+```
+
+```{code-cell} ipython3
+# In order to use TAP with this ADQL string using pyvo, you can do the following:
+result = service.search(adql)
+result
+```
+
+The above query shows that there are 597,488,849 redshifts in this table.
+
++++
+
+## What is the default maximum number of rows returned by the service?
+
+This service will return a maximum of 2 billion rows by default.
+
+```{code-cell} ipython3
+service.maxrec
+```
+
+This default maximum can be changed, and there is no hard upper limit to what it can be changed to.
+
+```{code-cell} ipython3
+print(service.hardlimit)
+```
+
+## List the columns in the chosen table
+
+This table contains 301 columns.
+
+```{code-cell} ipython3
+columns = tables[tablename].columns
+print(len(columns))
+```
+
+Let's learn a bit more about them.
+
+```{code-cell} ipython3
+for col in columns:
+    print(f'{f"{col.name}":30s}  {col.description}')
+```
+
+## Create a histogram of redshifts
+
+Let's figure out what redshift range these galaxies cover. Since we found out above that it's a large catalog, we can start with a spatial search over a small area of 0.1 deg. The ADQL that is needed for the spatial constraint is:
+
+```{code-cell} ipython3
+adql = f"SELECT redshift FROM {tablename} WHERE CONTAINS(POINT('ICRS', RAMean, DecMean), CIRCLE('ICRS',54.218205903,-37.497959343,.1))=1"
+adql
+```
+
+Now we can use the previously-defined service to execute the query with the spatial contraint.
+
+```{code-cell} ipython3
+cone_results = service.search(adql)
+```
+
+```{code-cell} ipython3
+# Plot a histogram
+import numpy as np
+import matplotlib.mlab as mlab
+import matplotlib.pyplot as plt
+
+num_bins = 20
+# the histogram of the data
+n, bins, patches = plt.hist(cone_results['redshift'], num_bins, 
+                            facecolor='blue', alpha = 0.5)
+plt.xlabel('Redshift')
+plt.ylabel('Number')
+plt.title('Redshift Histogram CosmoDC2 Mock Catalog V1 abridged')
+```
+
+We can easily see form this plot that the simulated galaxies go out to z = 3.
+
++++
+
+## Visualize galaxy colors at z ~ 0.5
+
+Now let's visualize the galaxy main sequence at z = 2.0. First, we'll do a narrow redshift cut with no spatial constraint.
+
+Let's do it as an asynchronous search since this might take awhile.
+
+```{code-cell} ipython3
+service = vo.dal.TAPService("https://irsa.ipac.caltech.edu/TAP")
+adql = f"SELECT Mag_true_r_sdss_z0, Mag_true_g_sdss_z0, redshift FROM {tablename} WHERE redshift > 0.5 and redshift < 0.54"
+results = service.run_async(adql)
+```
+
+```{code-cell} ipython3
+len(results['mag_true_r_sdss_z0'])
+```
+
+```{code-cell} ipython3
+# Since this results in almost 4 million galaxies, 
+# we will construct a 2D histogram rather than a scatter plot.
+plt.hist2d(results['mag_true_r_sdss_z0'], results['mag_true_g_sdss_z0']-results['mag_true_r_sdss_z0'],
+           bins=200, cmap='plasma', cmax=500)
+
+# Plot a colorbar with label.
+cb = plt.colorbar()
+cb.set_label('Number')
+
+# Add title and labels to plot.
+plt.xlabel('SDSS Mag r')
+plt.ylabel('SDSS rest-frame g-r color')
+
+# Show the plot.
+plt.show()
+```
+
+## About this notebook
+
+ * Author: Vandana Desai (IRSA Science Lead)
+ * Updated: 2024-07-24
+ * Contact: https://irsa.ipac.caltech.edu/docs/help_desk.html

From f3b9b38615326abf80ce6cb0b30a3151e7798047 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Brigitta=20Sip=C5=91cz?= <bsipocz@gmail.com>
Date: Wed, 24 Jul 2024 13:48:50 -0700
Subject: [PATCH 3/5] Adding cosmoDC2 tutorial to the page

---
 index.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/index.md b/index.md
index 0181cdde..3c25e0e4 100644
--- a/index.md
+++ b/index.md
@@ -25,7 +25,6 @@ tutorials/irsa-sia-examples/siav2_seip
 
 ```
 
-<!---
 ### Catalogs
 
 ```{toctree}
@@ -33,9 +32,13 @@ tutorials/irsa-sia-examples/siav2_seip
 maxdepth: 1
 ---
 
+tutorials/cosmodc2/cosmoDC2_TAP_access.md
 
 ```
 
+
+<!---
+
 ## Visualizations
 ```{toctree}
 ---

From 4d72c6c61b62471f74f1c9649952b2e35e4913ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Brigitta=20Sip=C5=91cz?= <bsipocz@gmail.com>
Date: Wed, 24 Jul 2024 20:03:40 -0700
Subject: [PATCH 4/5] Update ignore testing list

---
 ignore_testing | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ignore_testing b/ignore_testing
index 85c793e7..c0c1231e 100644
--- a/ignore_testing
+++ b/ignore_testing
@@ -1,2 +1 @@
-tutorials/cosmosims
 tutorials/parallelize

From e748759ac4aad6135ab369896fd2ccb70b192a75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Brigitta=20Sip=C5=91cz?= <bsipocz@gmail.com>
Date: Wed, 24 Jul 2024 21:12:55 -0700
Subject: [PATCH 5/5] Shortening title for TOC

---
 tutorials/cosmodc2/cosmoDC2_TAP_access.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tutorials/cosmodc2/cosmoDC2_TAP_access.md b/tutorials/cosmodc2/cosmoDC2_TAP_access.md
index 647c4b9f..50782382 100644
--- a/tutorials/cosmodc2/cosmoDC2_TAP_access.md
+++ b/tutorials/cosmodc2/cosmoDC2_TAP_access.md
@@ -14,11 +14,11 @@ kernelspec:
 
 
 
-This tutorial demonstrates how to access the CosmoDC2 Mock V1 catalogs. More information about these catalogs can be found here: https://irsa.ipac.caltech.edu/Missions/cosmodc2.html
+# CosmoDC2 Mock v1 catalogs with IRSA TAP
 
-# Use IRSA's Virtual Observatory Table Access Protocol (TAP) Service to access these catalogs.
+This tutorial demonstrates how to access the CosmoDC2 Mock V1 catalogs. More information about these catalogs can be found here: https://irsa.ipac.caltech.edu/Missions/cosmodc2.html
 
-This catalog can be accessed through IRSA's Table Access Protocol (TAP) service. See https://www.ivoa.net/documents/TAP/ for details on the protocol. This service can be accessed through Python using the pvyo library.
+These catalogs can be accessed through IRSA's Virtual Ovservatory Table Access Protocol (TAP) service. See https://www.ivoa.net/documents/TAP/ for details on the protocol. This service can be accessed through Python using the PyVO library.
 
 ```{code-cell} ipython3
 import pyvo as vo
@@ -62,7 +62,7 @@ tablename = 'cosmodc2mockv1_heavy'
 With TAP, you can query catalogs with constraints specified in IVOA Astronomical Data Query Language (ADQL; https://www.ivoa.net/documents/latest/ADQL.html), which is based on SQL.
 
 ```{code-cell} ipython3
-# For example, this snippet of ADQL counts the number of elements in 
+# For example, this snippet of ADQL counts the number of elements in
 # the redshift column of the table we chose.
 adql = f"SELECT count(redshift) FROM {tablename}"
 adql
@@ -131,7 +131,7 @@ import matplotlib.pyplot as plt
 
 num_bins = 20
 # the histogram of the data
-n, bins, patches = plt.hist(cone_results['redshift'], num_bins, 
+n, bins, patches = plt.hist(cone_results['redshift'], num_bins,
                             facecolor='blue', alpha = 0.5)
 plt.xlabel('Redshift')
 plt.ylabel('Number')
@@ -159,7 +159,7 @@ len(results['mag_true_r_sdss_z0'])
 ```
 
 ```{code-cell} ipython3
-# Since this results in almost 4 million galaxies, 
+# Since this results in almost 4 million galaxies,
 # we will construct a 2D histogram rather than a scatter plot.
 plt.hist2d(results['mag_true_r_sdss_z0'], results['mag_true_g_sdss_z0']-results['mag_true_r_sdss_z0'],
            bins=200, cmap='plasma', cmax=500)