# WALLABY public data release notebook

This notebook is intended to support with exporting the WALLABY source and kinematic data tables and associated products to be ingested into a public archive (CADC and CASDA). It is intended that the user of this notebook will be a member of the WALLABY project team, or a member of the WALLABY TWG7 group.

---

In [33]:
import os
import shutil
import getpass
import requests
import getpass
import pyvo as vo
from pyvo.auth import authsession, securitymethods
import numpy as np
from astropy.io import ascii
from astropy.wcs import WCS
from astropy.io.votable import from_table, parse_single_table
from astropy.table import vstack

### Authenticate

<span style="font-weight: bold; color: #FF0000;">⚠ Update the cell below with your username and enter your password</span>

In [2]:
# Enter WALLABY user username and password

username = 'wallaby_user'
password = getpass.getpass('Enter your password')

Enter your password ········


In [3]:
# Connect with TAP service

URL = "https://wallaby.aussrc.org/tap"
auth = vo.auth.AuthSession()
auth.add_security_method_for_url(URL, vo.auth.securitymethods.BASIC)
auth.credentials.set_password(username, password)
tap = vo.dal.TAPService(URL, session=auth)

---

# 1. Decide Release

Determine which internal releases you would like to bundle in this public data release. You will also need to set a name for this public data release.

In [4]:
# Get all tags

query = "SELECT * FROM wallaby.tag"
votable = tap.search(query)
table = votable.to_table()
print(table)

 id            name           ...          added_at         
--- ------------------------- ... --------------------------
  2                 Norma DR1 ... 2022-02-17T00:49:41.748948
  3              NGC 5044 DR1 ... 2022-02-17T05:20:01.326107
  6                 Hydra DR1 ... 2022-02-23T04:49:08.557440
  7                 Hydra DR2 ... 2022-02-23T04:49:22.070213
 12              NGC 5044 DR3 ... 2022-10-24T08:44:05.774250
 10              NGC 5044 DR2 ... 2022-06-10T08:55:44.842245
  9              NGC 4808 DR1 ... 2022-05-27T08:18:00.781334
...                       ... ...                        ...
 16   WALLABY Single Coverage ... 2023-06-15T07:59:23.960981
  5                 Multiplet ... 2022-02-23T03:30:02.628721
 17              Questionable ... 2023-11-17T05:57:27.147680
  4                 Component ... 2022-02-17T07:09:19.996888
 18                   WALLABY ... 2023-11-28T07:28:04.877718
 19      Pilot 2 High-Res DR1 ... 2023-12-09T05:13:35.114331
 20    WALLABY Pilot Gal

<span style="font-weight: bold; color: #FF0000;">⚠ Update the cell below. Add tags to the list for release, and update `release_name` variable</span>

In [5]:
# List of tags
tags = ['Hydra DR2']

# Release name
release_name_raw = "WALLABY Test PDR"
release_name = release_name_raw.replace(' ', '_')

---

# 2. Sources

## Catalog

In [6]:
# Retrieve catalog as Astropy table

query = """SELECT d.*, ivo_string_agg(t.name || ': ' || t.description, '; ') AS tags, ivo_string_agg(c.comment, '; ') AS comments
        FROM wallaby.detection d
        LEFT JOIN wallaby.tag_detection td ON d.id = td.detection_id 
        LEFT JOIN wallaby.tag t ON t.id = td.tag_id
        LEFT JOIN wallaby.comment c ON d.id = c.detection_id
        WHERE t.name IN ('Internal Data Release', '$TAG_NAME')
        GROUP BY d.id"""

In [7]:
table = None
for idx, tag_name in enumerate(tags):
    q = query.replace('$TAG_NAME', tag_name)
    result = tap.search(q)
    if idx == 0:
        table = result.to_table()
        table['SRCTR'] = tag_name.replace(' ', '_').replace('DR', 'TR')
    else:
        new_table = result.to_table()
        new_table['SRCTR'] = tag_name.replace(' ', '_').replace('DR', 'TR')
        table = vstack([table, new_table])

table = table[0:20]
table

id,name,run_id,instance_id,access_url,access_format,source_name,x,y,z,x_min,x_max,y_min,y_max,z_min,z_max,n_pix,f_min,f_max,f_sum,rel,flag,rms,w20,w50,ell_maj,ell_min,ell_pa,ell3s_maj,ell3s_min,ell3s_pa,kin_pa,err_x,err_y,err_z,err_f_sum,ra,dec,freq,l,b,v_rad,v_opt,v_app,tags,comments,SRCTR
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,pix,pix,pix,pix,pix,pix,pix,pix,pix,Unnamed: 16_level_1,Jy / beam,Jy / beam,Hz Jy,Unnamed: 20_level_1,Unnamed: 21_level_1,Jy / beam,Hz,Hz,pix,pix,deg,pix,pix,deg,deg,pix,pix,pix,Hz Jy,deg,deg,Hz,deg,deg,m / s,m / s,m / s,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
int64,object,int64,int64,object,object,object,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,int32,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,object,object,str9
149,SoFiA_J100321-291708,2,2,https://wallaby.aussrc.org/survey/vo/dl/dlmeta?ID=149,application/x-votable+xml;content=datalink,WALLABY J100321-291708,7236.00103934727,1423.16043832819,2972.96363051423,1791.0,1804.0,816.0,829.0,805.0,833.0,1931.0,-0.007087035104632,0.022576590999961,3608.83947053987,0.837153297313005,0,0.004161843859869,474090.261563014,435196.324690321,6.06182663266344,4.83124509672712,-17.974746164381,5.6299282273289,4.25512857362692,8.96305187790603,221.42043177081,0.339166141990331,0.387283671397269,0.904869338342384,634.477130140317,150.838754090075,-29.2856143600128,1350554882.04651,--,--,--,--,--,Hydra DR2: Hydra DR2 data release,No optical counterpart? False positive?,Hydra_TR2
150,SoFiA_J100336-262923,2,2,https://wallaby.aussrc.org/survey/vo/dl/dlmeta?ID=150,application/x-votable+xml;content=datalink,WALLABY J100336-262923,7243.89607023118,3100.64976425096,6515.13057852051,1792.0,1814.0,253.0,276.0,1546.0,1579.0,6213.0,-0.013497338630259,0.020171672105789,9561.18173978009,0.947242992874895,4,0.004799790359768,532207.054794837,244939.520422038,8.59140342476685,7.49182730436655,26.846619313103,5.34486369152227,4.16510312187258,38.2451931572564,81.2713489724864,0.330665765631941,0.351869419373389,0.637651975476422,1312.53834162702,150.903417524457,-26.4899525051286,1416150566.26878,--,--,--,--,--,Hydra DR2: Hydra DR2 data release,,Hydra_TR2
151,SoFiA_J100342-270137,2,2,https://wallaby.aussrc.org/survey/vo/dl/dlmeta?ID=151,application/x-votable+xml;content=datalink,WALLABY J100342-270137,7224.35618272415,2778.7496873355,6500.75774489546,1748.0,1821.0,1034.0,1094.0,1514.0,1577.0,58081.0,-0.017361178994179,0.054538358002901,223979.669957672,1.0,4,0.003880673716117,934457.110944415,878868.02848142,31.970546978407,11.7695820816275,56.0422663867539,35.6818369363527,13.2859705745635,55.2042495392413,236.449055457261,0.166066104384125,0.135158991760395,0.179486683043718,3244.61539570995,150.925581251107,-27.0271233410342,1415884402.68313,--,--,--,--,--,Hydra DR2: Hydra DR2 data release,,Hydra_TR2
152,SoFiA_J100351-263707,2,2,https://wallaby.aussrc.org/survey/vo/dl/dlmeta?ID=152,application/x-votable+xml;content=datalink,WALLABY J100351-263707,7209.42150741268,3024.27464027696,6522.00893003091,1747.0,1789.0,156.0,209.0,1546.0,1598.0,24842.0,-0.013893693685532,0.040277618914843,41787.13431873,1.0,4,0.003881131099795,725769.491888538,364646.092383804,19.1225473491213,11.3426865296583,8.80349435744384,15.6311006583697,11.3887187134962,22.813776978591,31.5818636387107,0.210907501690365,0.351398129218765,0.317791235122476,2122.22117985972,150.964352979355,-26.6186126212923,1416277943.1486,--,--,--,--,--,Hydra DR2: Hydra DR2 data release,,Hydra_TR2
153,SoFiA_J100351-273417,2,2,https://wallaby.aussrc.org/survey/vo/dl/dlmeta?ID=153,application/x-votable+xml;content=datalink,WALLABY J100351-273417,7194.87750349286,2452.73990067864,6037.08958285272,1730.0,1781.0,692.0,770.0,1034.0,1130.0,54454.0,-0.014263059012592,0.028143471106887,73139.8317373718,1.0,4,0.003520469398751,1593269.12293363,1455284.43738326,27.144747180945,15.4975973522364,-8.7047944413829,26.4958361384957,15.5860954672419,-5.83646319968497,12.4338631983665,0.192764504273769,0.333358991030242,0.600042162027366,2850.06339301412,150.966205246492,-27.5715028775967,1407297955.2379,--,--,--,--,--,Hydra DR2: Hydra DR2 data release,,Hydra_TR2
154,SoFiA_J100426-282638,2,2,https://wallaby.aussrc.org/survey/vo/dl/dlmeta?ID=154,application/x-votable+xml;content=datalink,WALLABY J100426-282638,7106.81531084845,1931.09812309889,6468.79402106765,1635.0,1703.0,186.0,246.0,1478.0,1546.0,51626.0,-0.014277187176049,0.056374944746494,161361.426596455,1.0,4,0.002959179885522,1002122.24103427,924331.340002807,25.1025278592234,10.4713187344896,87.2794461341103,29.6631739118679,11.7630427644661,87.4872404354311,87.3022426056449,0.15446130522244,0.088904650210685,0.184395742684489,2332.62355942297,151.109056583226,-28.444156643408,1415292481.8715,--,--,--,--,--,Hydra DR2: Hydra DR2 data release,,Hydra_TR2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162,SoFiA_J100713-262336,2,2,https://wallaby.aussrc.org/survey/vo/dl/dlmeta?ID=162,application/x-votable+xml;content=datalink,WALLABY J100713-262336,6759.91229481986,3168.62387129379,5586.01618395469,1308.0,1333.0,321.0,346.0,607.0,650.0,9318.0,-0.006459211930633,0.013795930892229,10179.4842349865,0.999999999996601,0,0.001980107318163,687441.423014595,592569.388730845,8.65605259076825,7.38283499904731,86.0166215957663,8.66111868981296,6.47019736473666,-89.0783771432741,250.844780304426,0.22144066745694,0.200134880127347,0.466712037227218,663.115431781383,151.807407783037,-26.3934718307268,1398944744.14721,--,--,--,--,--,Hydra DR2: Hydra DR2 data release,,Hydra_TR2
163,SoFiA_J100720-262426,2,2,https://wallaby.aussrc.org/survey/vo/dl/dlmeta?ID=163,application/x-votable+xml;content=datalink,WALLABY J100720-262426,6744.23231498384,3160.55075791553,2727.09455075875,1297.0,1311.0,317.0,334.0,532.0,601.0,4154.0,-0.004440364427865,0.009577467106283,3372.04833411086,0.994740482392223,0,0.001741084303666,1139646.80900798,619934.705680155,5.87961053296285,5.10902538598235,-22.7022721900455,7.70337067469183,5.09262527601622,47.1550462222191,221.043972263814,0.221879482845253,0.2510128525382,1.3552399822177,389.30689827525,151.836347674689,-26.4073734412492,1346001750.93993,--,--,--,--,--,Hydra DR2: Hydra DR2 data release,,Hydra_TR2
164,SoFiA_J100746-281451,2,2,https://wallaby.aussrc.org/survey/vo/dl/dlmeta?ID=164,application/x-votable+xml;content=datalink,WALLABY J100746-281451,6669.33701290677,2057.58917577332,5561.26048800659,1218.0,1238.0,331.0,347.0,580.0,627.0,2580.0,-0.004103654995561,0.011605833657086,2081.89037681755,0.999842410192386,0,0.001852541111744,798091.389327351,594501.84543961,7.24964076543167,4.76518270254582,-56.1132742578163,9.26116290612236,3.1861597490365,-60.6082154642124,115.34912059815,0.365357006499185,0.337324947476304,1.34633804686549,326.450065814047,151.942761184464,-28.247535710319,1398486305.33335,--,--,--,--,--,Hydra DR2: Hydra DR2 data release,,Hydra_TR2


## Modifying the catalog

There are some additional columns and calculated properties that are required for the release. The column metadata (e.g. UCDs, units, description etc as required to conform with VO standards) also need to be included for these additional columns. These include:

| Column | Description |
| --- | --- |
| `qflag` |  |
| `kflag` | column to indicate whether or not there is a kinematic model associated with the detection |
| `team_release` | Column with the release name |
| `f_sum_corr` | |
| `err_f_sum_corr` | |
| `dist_h` | |
| `log_m_hi_corr` | Uses `v_est` and `dist_est` which are calculated properties |

In [8]:
# Table corrections

rest_freq = 1.42040575179E+09
c = 2.9979245e8
H0 = 70.0

write_table = table.copy()
write_table['name'] = table['source_name']
write_table['qflag'] = table['flag']
write_table['kflag'] = np.zeros(len(table['flag']))
write_table['team_release'] = release_name_raw
write_table['f_sum_corr'] = table['f_sum'] / 10.0 ** (0.0285 * np.log10(table['f_sum'])**3.0 -0.439 * np.log10(table['f_sum'])**2.0 + 2.294 * np.log10(table['f_sum']) - 4.097)
write_table['err_f_sum_corr'] = table['err_f_sum'] / table['f_sum'] * write_table['f_sum_corr']
write_table['v_est'] = ((rest_freq - table['freq']) / table['freq'] * c / 1000.0)
write_table['dist_h'] = write_table['v_est'] / H0
write_table['log_m_hi'] = np.log10(49.7 * write_table['dist_h']**2.0 * table['f_sum'])
write_table['log_m_hi_corr'] = np.log10(49.7 * write_table['dist_h']**2.0 * write_table['f_sum_corr'])

In [9]:
# Remove certain columns from the astropy table

write_table.remove_columns(['id', 'run_id', 'instance_id', 'access_url', 'access_format', 'source_name', 'flag', 'v_est', 'l', 'b', 'v_rad', 'v_opt', 'v_app', 'tags', 'SRCTR'])
votable = from_table(write_table)

In [10]:
# Update derived quantity columns of votable

f_sum_corr_field = votable.get_field_by_id('f_sum_corr')
f_sum_corr_field.ucd = "phot.flux;meta.main"
f_sum_corr_field.unit = "Jy*Hz"
f_sum_corr_field.description = "The integrated flux within 3D source mask statistically corrected to match single dish observations"

err_f_sum_corr_field = votable.get_field_by_id('err_f_sum_corr')
err_f_sum_corr_field.ucd = "stat.error;phot.flux"
err_f_sum_corr_field.unit = "Jy*Hz"
err_f_sum_corr_field.description = "Statistical uncertainty of the single dish corrected integrated flux"

dist_h_field = votable.get_field_by_id('dist_h')
dist_h_field.ucd = "pos.distance"
dist_h_field.unit = "Mpc"
dist_h_field.description = "Local Hubble distance derived from the barycentric source frequency"

log_m_hi_field = votable.get_field_by_id('log_m_hi')
log_m_hi_field.ucd = "phys.mass"
log_m_hi_field.unit = "log10(Msol)"
log_m_hi_field.description = "The estimated log10 mass of the cube using f_sum and freq"

log_m_hi_corr_field = votable.get_field_by_id('log_m_hi_corr')
log_m_hi_corr_field.ucd = "phys.mass"
log_m_hi_corr_field.unit = "log10(Msol)"
log_m_hi_corr_field.description = "The estimated log10 mass of the cube using f_sum_corr and freq"

qflag_field = votable.get_field_by_id('qflag')
qflag_field.datatype = "double"
qflag_field.ucd = "meta.code.qual"
qflag_field.description = "Quality flag"

kflag_field = votable.get_field_by_id('kflag')
kflag_field.datatype = "double"
kflag_field.ucd = "meta.code"
kflag_field.description = "Kinematic model flag"

comments_field = votable.get_field_by_id('comments')
comments_field.datatype = "char"
comments_field.ucd = "meta.note"
comments_field.description = "Comments on individual sources"

team_release_field = votable.get_field_by_id('team_release')
team_release_field.datatype = "char"
team_release_field.ucd = "meta.dataset;meta.main"
team_release_field.description = "Internal team release identifier"



In [11]:
print(write_table.columns)
print(len(write_table.columns))

<TableColumns names=('name','x','y','z','x_min','x_max','y_min','y_max','z_min','z_max','n_pix','f_min','f_max','f_sum','rel','rms','w20','w50','ell_maj','ell_min','ell_pa','ell3s_maj','ell3s_min','ell3s_pa','kin_pa','err_x','err_y','err_z','err_f_sum','ra','dec','freq','comments','qflag','kflag','team_release','f_sum_corr','err_f_sum_corr','dist_h','log_m_hi','log_m_hi_corr')>
41


In [12]:
# Download catalog table

votable.version = '1.3'
votable_filename = f'{release_name}_SourceCatalogue.xml'
votable.to_xml(votable_filename)

In [13]:
# Prepare header table

header_table = write_table.copy()
header_table.remove_rows(slice(0, len(header_table), 1))
header_table
header_votable = from_table(header_table)
header_votable.version = '1.3'

## Products

In [14]:
# useful function for downloading table products (requires authentication)

def download_products(row, products_filename, chunk_size=8192):
    """Download products for a row of the table (a detection entry)
    
    """
    name = row['source_name']
    access_url = row['access_url']
    votable = parse_single_table(access_url)
    product_table = votable.to_table()
    url = product_table[product_table['description'] == 'SoFiA-2 Detection Products'][0]['access_url']
    with requests.get(url, auth=(username, password), stream=True) as r:
        r.raise_for_status()
        with open(products_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=chunk_size):
                f.write(chunk)
    print(f'Downloaded completed for {name}')
    return

def download_table_products(table, directory, chunk_size=8192):
    """Download WALLABY products from ADQL queried table

    """
    if not os.path.exists(directory):
        os.mkdir(directory)
    print(f'Saving products to {directory}')
    for row in table:
        name = row['source_name']
        products_filename = os.path.join(directory, f'{name}.tar')
        download_products(row, products_filename, chunk_size)
    print('Downloads complete')
    return

In [15]:
# Write output products for a source

download_table_products(table[0:20], release_name)

Saving products to WALLABY_Test_PDR
Downloaded completed for WALLABY J100321-291708
Downloaded completed for WALLABY J100336-262923
Downloaded completed for WALLABY J100342-270137
Downloaded completed for WALLABY J100351-263707
Downloaded completed for WALLABY J100351-273417
Downloaded completed for WALLABY J100426-282638
Downloaded completed for WALLABY J100539-282633
Downloaded completed for WALLABY J100555-291840
Downloaded completed for WALLABY J100634-295615
Downloaded completed for WALLABY J100640-273917
Downloaded completed for WALLABY J100656-251731
Downloaded completed for WALLABY J100700-273944
Downloaded completed for WALLABY J100707-262300
Downloaded completed for WALLABY J100713-262336
Downloaded completed for WALLABY J100720-262426
Downloaded completed for WALLABY J100746-281451
Downloaded completed for WALLABY J100752-250626
Downloaded completed for WALLABY J100808-260942
Downloaded completed for WALLABY J100827-270707
Downloaded completed for WALLABY J100830-262140
Down

In [16]:
# Update product files

import tarfile
import glob
from astropy.io import fits
import matplotlib.pyplot as plt

In [17]:
def pixel_hdu(ra, dec, frequency):
    """Create dummy pixel hdu element (TBA)
    
    """
    return

def spectrum_to_fits(f_in, f_cube, f_out, ra, dec, frequency):
    """Convert the SoFiA-2 output spectrum .txt file to a .fits file for public release
    Writes a HDUList object with:
        - dummy pixel (fits metadata from the accompanying _cube.fits file
        - fits binary table with spectra.txt content
        
    """
    # read spectrum and construct binary table
    channels, freq, flux_density, pixels = np.loadtxt(f_in, skiprows=38, unpack=True, usecols=[0,1,2,3])    
    channels_col = fits.Column(name='Channel', format='D', array=channels.astype('int'), unit='')
    freq_col = fits.Column(name='Frequency', format='E', array=freq, unit='Hz')
    flux_density_col = fits.Column(name='Flux density', format='E', array=flux_density, unit='Jy')
    pixel_col = fits.Column(name='Pixels', format='D', array=pixels.astype('int'), unit='')
    fits_table = fits.BinTableHDU.from_columns([channels_col, freq_col, flux_density_col, pixel_col])
    
    # construct dummy image hdu
    keys = ['OBJECT', 'CDELT1', 'CDELT2', 'CDELT3', 'CTYPE1', 'CTYPE2', 'CTYPE3', 'ORIGIN', 'EQUINOX', 'LONPOLE', 'LATPOLE', 'SRCVERS', 'SRCTR']
    # keys += ['SBID']
    with fits.open(f_cube, mode='readonly') as hdu_cube:
        header_cube = hdu_cube[0].header
    hdu = fits.PrimaryHDU()
    header = hdu.header
    hdu.data = np.array([[[0]]]).astype('int16')
    header['CRPIX1'] = 0
    header['CRPIX2'] = 0
    header['CRPIX3'] = 0
    header['CUNIT1'] = 'deg'
    header['CUNIT2'] = 'deg'
    header['CUNIT3'] = 'Hz'
    header['CRVAL1'] = ra
    header['CRVAL2'] = dec
    header['CRVAL3'] = frequency
    header['SPECSYS'] = 'BARYCENT'
    header['RADESYS'] = 'FK5'
    for k in keys:
        header.set(k, header_cube[k])    

    # construct hdulist and write to file
    hdu_list = fits.HDUList([hdu, fits_table])
    hdu_list.writeto(f_out, overwrite=True, output_verify='fix')
    return

def mom0_to_png(data, f_out):
    """Create plot of mom0 map as png file for archive cutouts

    """
    plt.imshow(data)
    plt.axis('off')
    plt.savefig(f_out, bbox_inches='tight', pad_inches=0)
    plt.close()
    return

In [18]:
# Update all product files

product_tarfiles = glob.glob(os.path.join(release_name, '*.tar'))
product_files = [f.replace('.tar', '') for f in product_tarfiles]

# Extract
for f in product_tarfiles:
    filename = f.replace('.tar', '')
    with tarfile.open(f) as tf:
        tf.extractall(path=filename)
    # os.remove(f)

# Update product files
for idx_pf, pf in enumerate(product_files):
    print(f'Folder {pf} [{idx_pf + 1}/{len(product_files)}]')
    fits_files = glob.glob(os.path.join(pf, '*.fits'))
    for idx_ff, ff in enumerate(fits_files):
        print(f'[{idx_ff + 1}/{len(fits_files)}] {ff}')
        source_name = ff.split('/')[1]
        with fits.open(ff, mode='update') as hdul:
            header = hdul[0].header
            # NOTE: DATE card?
            header['SRCVERS'] = header['ORIGIN']  # Get SoFiA version from ORIGIN header
            header['SRCTR'] = release_name
            header['OBJECT'] = source_name
            hdul.flush()

            # Download moment 0 map figure
            if 'mom0.fits' in ff:
                print(f'[{idx_ff + 1}/{len(fits_files)}] Saving mom0 png figure')
                data = hdul[0].data
                mom0_png = ff.replace('.fits', '.png')
                mom0_to_png(data, mom0_png)

    # Update spectra
    print('Creating spec.fits file')
    spectra_files = glob.glob(os.path.join(pf, '*spec.txt'))
    assert len(spectra_files) == 1, 'Should only be 1 spectrum file per detection'
    spec_f_in = spectra_files[0]
    spec_f_out = spec_f_in.replace('.txt', '.fits')
    spec_f_cube = spec_f_in.replace('spec.txt', 'cube.fits')
    assert os.path.exists(spec_f_cube), 'Cutout cube corresponding to spectra file does not exist'
    row = write_table[write_table['name'] == source_name][0]
    spectrum_to_fits(spec_f_in, spec_f_cube, spec_f_out, row['ra'], row['dec'], row['freq'])

Folder WALLABY_Test_PDR/WALLABY J100321-291708 [1/20]
[1/7] WALLABY_Test_PDR/WALLABY J100321-291708/Hydra_DR2_2_SoFiA_J100321-291708_spec.fits
[2/7] WALLABY_Test_PDR/WALLABY J100321-291708/Hydra_DR2_2_SoFiA_J100321-291708_cube.fits
[3/7] WALLABY_Test_PDR/WALLABY J100321-291708/Hydra_DR2_2_SoFiA_J100321-291708_mom2.fits
[4/7] WALLABY_Test_PDR/WALLABY J100321-291708/Hydra_DR2_2_SoFiA_J100321-291708_mask.fits
[5/7] WALLABY_Test_PDR/WALLABY J100321-291708/Hydra_DR2_2_SoFiA_J100321-291708_mom1.fits
[6/7] WALLABY_Test_PDR/WALLABY J100321-291708/Hydra_DR2_2_SoFiA_J100321-291708_chan.fits
[7/7] WALLABY_Test_PDR/WALLABY J100321-291708/Hydra_DR2_2_SoFiA_J100321-291708_mom0.fits
[7/7] Saving mom0 png figure
Creating spec.fits file
Folder WALLABY_Test_PDR/WALLABY J100720-262426 [2/20]
[1/7] WALLABY_Test_PDR/WALLABY J100720-262426/Hydra_DR2_2_SoFiA_J100720-262426_spec.fits
[2/7] WALLABY_Test_PDR/WALLABY J100720-262426/Hydra_DR2_2_SoFiA_J100720-262426_mom2.fits
[3/7] WALLABY_Test_PDR/WALLABY J100720

---

## CASDA release

Move product files to required directory structure for CASDA public data releases:

- catalogue (VOTable version 1.3)
- cubelets (mask and cube files)
- moment_maps (all moment maps, including a mom0 .png file if you want a preview)
- spectra (.spec file in fits format)

File formats are: `f'{WALLABY_name}_{release_version}.fits`

In [19]:
# copy files over for CASDA release

# Create directory_structure
basedir = os.path.join('CASDA', release_name)
os.makedirs(os.path.join(basedir, 'catalogue'), exist_ok=True)
os.makedirs(os.path.join(basedir, 'cubelets'), exist_ok=True)
os.makedirs(os.path.join(basedir, 'moment_maps'), exist_ok=True)
os.makedirs(os.path.join(basedir, 'spectra'), exist_ok=True)

# Copy catalogue xml
shutil.copy(votable_filename, os.path.join(basedir, 'catalogue', votable_filename))

# Copy product files
for idx_pf, pf in enumerate(product_files):
    source_name = pf.split('/')[1].replace(' ', '_')
    print(f'Source {source_name} [{idx_pf + 1}/{len(product_files)}]')
    row = table[table['source_name'] == source_name.replace('_', ' ')][0]
    srctr = row['SRCTR']
    p_files = glob.glob(os.path.join(pf, '*'))
    for f in p_files:
        suffix = f.rsplit('_', 1)[1]
        new_filename = f'{source_name}_{srctr}_{release_name}_{suffix}'
        
        # moment maps
        if any([t in suffix for t in ['mom0', 'mom1', 'mom2', 'chan']]):
            shutil.copy(f, os.path.join(basedir, 'moment_maps', new_filename))
            
        # cubelets
        elif any([t in suffix for t in ['cube', 'mask']]):
            shutil.copy(f, os.path.join(basedir, 'cubelets', new_filename))
        
        # spectra
        elif any([t in suffix for t in ['spec.fits']]):
            shutil.copy(f, os.path.join(basedir, 'spectra', new_filename))
        
        else:
            print(f'Skipping file {f}')

Source WALLABY_J100321-291708 [1/20]
Skipping file WALLABY_Test_PDR/WALLABY J100321-291708/Hydra_DR2_2_SoFiA_J100321-291708_plot.png
Skipping file WALLABY_Test_PDR/WALLABY J100321-291708/Hydra_DR2_2_SoFiA_J100321-291708_spec.txt
Source WALLABY_J100720-262426 [2/20]
Skipping file WALLABY_Test_PDR/WALLABY J100720-262426/Hydra_DR2_2_SoFiA_J100720-262426_plot.png
Skipping file WALLABY_Test_PDR/WALLABY J100720-262426/Hydra_DR2_2_SoFiA_J100720-262426_spec.txt
Source WALLABY_J100336-262923 [3/20]
Skipping file WALLABY_Test_PDR/WALLABY J100336-262923/Hydra_DR2_2_SoFiA_J100336-262923_spec.txt
Skipping file WALLABY_Test_PDR/WALLABY J100336-262923/Hydra_DR2_2_SoFiA_J100336-262923_plot.png
Source WALLABY_J100830-262140 [4/20]
Skipping file WALLABY_Test_PDR/WALLABY J100830-262140/Hydra_DR2_2_SoFiA_J100830-262140_spec.txt
Skipping file WALLABY_Test_PDR/WALLABY J100830-262140/Hydra_DR2_2_SoFiA_J100830-262140_plot.png
Source WALLABY_J100707-262300 [5/20]
Skipping file WALLABY_Test_PDR/WALLABY J100707-

## CADC Release

Create VOTable object for the metadata, but export the catalogue data as a CSV file. Copy product files for CADC public data release required file structure.

- Each detection (file format: WALLABY name, release version) folder contains the product files
- Each product file has the format: `f'{WALLABY_name}_{internal_release_version}_{release_version}_<ext>.fits`

In [20]:
# copy files over for CADC release

# Create directory_structure
basedir = os.path.join('CADC', release_name)
os.makedirs(basedir, exist_ok=True)

# Write catalog
ascii.write(write_table, os.path.join(basedir, votable_filename.replace('.xml', '.csv')), format='csv', overwrite=True)

# Copy product files
for idx_pf, pf in enumerate(product_files):
    source_name = pf.split('/')[1].replace(' ', '_')
    row = table[table['source_name'] == source_name.replace('_', ' ')][0]
    srctr = row['SRCTR']
    print(f'Source {source_name} [{idx_pf + 1}/{len(product_files)}]')
    source_dir = os.path.join(basedir, f'{source_name}_{release_name}')
    os.makedirs(source_dir, exist_ok=True)
    p_files = glob.glob(os.path.join(pf, '*'))
    for f in p_files:
        suffix = f.rsplit('_', 1)[1]
        new_filename = f'{source_name}_{srctr}_{release_name}_{suffix}'
        if any([t in suffix for t in ['mom0', 'mom1', 'mom2', 'chan', 'cube', 'mask', 'spec.fits']]):
            shutil.copy(f, os.path.join(source_dir , new_filename))
        else:
            print(f'Skipping file {f}')

Source WALLABY_J100321-291708 [1/20]
Skipping file WALLABY_Test_PDR/WALLABY J100321-291708/Hydra_DR2_2_SoFiA_J100321-291708_plot.png
Skipping file WALLABY_Test_PDR/WALLABY J100321-291708/Hydra_DR2_2_SoFiA_J100321-291708_spec.txt
Source WALLABY_J100720-262426 [2/20]
Skipping file WALLABY_Test_PDR/WALLABY J100720-262426/Hydra_DR2_2_SoFiA_J100720-262426_plot.png
Skipping file WALLABY_Test_PDR/WALLABY J100720-262426/Hydra_DR2_2_SoFiA_J100720-262426_spec.txt
Source WALLABY_J100336-262923 [3/20]
Skipping file WALLABY_Test_PDR/WALLABY J100336-262923/Hydra_DR2_2_SoFiA_J100336-262923_spec.txt
Skipping file WALLABY_Test_PDR/WALLABY J100336-262923/Hydra_DR2_2_SoFiA_J100336-262923_plot.png
Source WALLABY_J100830-262140 [4/20]
Skipping file WALLABY_Test_PDR/WALLABY J100830-262140/Hydra_DR2_2_SoFiA_J100830-262140_spec.txt
Skipping file WALLABY_Test_PDR/WALLABY J100830-262140/Hydra_DR2_2_SoFiA_J100830-262140_plot.png
Source WALLABY_J100707-262300 [5/20]
Skipping file WALLABY_Test_PDR/WALLABY J100707-

## Data Central

Exporting to Data Central requires the generation of various .txt files that contain metadata for the survey and catalog files. Some user input here is required to describe the metadata for the project and the data release.

In [21]:
# Survey description metadata

import csv
from datetime import datetime
now = datetime.now().strftime('%d-%m-%Y')

def dict_to_dc_meta(filename, data):
    """Custom function to write dicts to CSV files following the file format convention
    required for Data Central metadata files.
    
    """
    with open(filename, 'w', newline='') as f:
        csv_writer = csv.DictWriter(f, data.keys(), delimiter='|')
        csv_writer.writeheader()
        csv_writer.writerow(data)
    return

<span style="font-weight: bold; color: #FF0000;">⚠ Update the cell below with the relevant survey metadata for this release</span>

In [22]:
# Survey metadata
survey_meta = {
    'name': 'wallaby',
    'pretty_name': 'WALLABY',
    'title': 'Widefield ASKAP L-band Legacy All-sky Blind surveY',
    'description': 'The Widefield ASKAP L-band Legacy All-sky Blind surveY (or WALLABY) is one of a number of surveys that are now running on the Australian SKA Pathfinder (ASKAP), which is an innovative imaging radio telescope located in an extremely radio quiet zone (the Inyarrimanha Ilgari Bundara, Murchison Radio-astronomy Observatory) in Western Australia. The aim of WALLABY is to use the powerful widefield phased-array technology of ASKAP to observe half of the Southern Hemisphere in the 21-cm line of neutral hydrogen (or HI) at 30-arcsec resolution (with a simultaneous 10-arcsec zoom mode for previously-known galaxies), thereby detecting and imaging the gas distribution in hundreds of thousands of external galaxies in the local Universe. This will allow astronomers to gain a much improved understanding of the processes involved in galaxy formation and evolution, and the role of stellar and black hole feedback, gas accretion and galaxy interactions in these processes. WALLABY has concluded two Pilot Survey phases and has imaged nearly 400 square degree of sky around nearby galaxy clusters with the full ASKAP-36 array, as well as a number of early science fields with smaller numbers of antennas. Full WALLABY started in late-2022.',
    'pi': 'Lister Staveley-Smith, Barbara Catinella',
    'contact': 'lister.staveley-smith@uwa.edu.au',
    'website': 'https://wallaby-survey.org/'
}

# Release meta
release_meta = {
    'name': 'wallaby_pdr2',
    'pretty_name': 'WALLABY Pilot Survey public data release 2',
    'version': 1,
    'data_release_number': 1,
    'contact': 'Tobias Westmeier <tobias.westmeier@uwa.edu.au>',
    'group': 'WALLABY', 
    'public': True
}

# Catalogue metadata
group_meta = {
    'name': 'WALLABY DR2',
    'pretty_name': 'WALLABY DR2',
    'description': 'WALLABY Pilot Survey Public Data Release 2',
    'documentation': '-',
    'contact': 'Tobias Westmeier <tobias.westmeier@uwa.edu.au>',
    'date': now,
    'version': 1
}

coordinate_meta = {
    'table_name': 'detection',
    'source_name_col': 'name',
    'long_col': 'ra',
    'lat_col': 'dec',
    'long_format': 'deg',
    'lat_format': 'deg',
    'frame': 'fk5',
    'equinox': 'J2000'
}

sql_meta = {
    'table_name': 'detection',
    'sql': '"SELECT * FROM detection"'
}

table_meta = {
    'name': 'detection',
    'description': 'WALLABY Pilot Survey Public Data Release 2',
    'group_name': 'WALLABY',
    'filename': '',
    'contact': 'Tobias Westmeier <tobias.westmeier@uwa.edu.au>',
    'date': now,
    'version': 1
}

In [23]:
# Survey metadata

basedir = os.path.join('data_central', 'wallaby')
os.makedirs(basedir, exist_ok=True)
dict_to_dc_meta(os.path.join(basedir, 'wallaby_survey_meta.txt'), survey_meta)

In [24]:
# Release metadata

release_dir = os.path.join(basedir, release_name.lower())
os.makedirs(release_dir, exist_ok=True)
dict_to_dc_meta(os.path.join(release_dir, f'{release_name.lower()}_release_meta.txt'), release_meta)

In [25]:
# Catalogue metadata

catalogues_dir = os.path.join(release_dir, 'catalogues')
os.makedirs(catalogues_dir, exist_ok=True)

catalogues_meta = {'name': None, 'table_name': None, 'description': None, 'ucd': None, 'unit': None, 'data_type': None}
with open(os.path.join(catalogues_dir, f'{release_name.lower()}_column_meta.txt'), 'w', newline='') as f:
    csv_writer = csv.DictWriter(f, catalogues_meta.keys(), delimiter='|')
    csv_writer.writeheader()
    for field in header_votable.get_first_table().iter_fields_and_params():
        csv_writer.writerow({
            'name': field.ID,
            'table_name': field.ID,
            'description': field.description or '',
            'ucd': field.ucd or '',
            'unit': field.unit or '',
            'data_type': field.datatype
        })

dict_to_dc_meta(os.path.join(catalogues_dir, f'{release_name.lower()}_coordinate_meta.txt'), coordinate_meta)
dict_to_dc_meta(os.path.join(catalogues_dir, f'{release_name.lower()}_group_meta.txt'), group_meta)
dict_to_dc_meta(os.path.join(catalogues_dir, f'{release_name.lower()}_sql_meta.txt'), sql_meta)
dict_to_dc_meta(os.path.join(catalogues_dir, f'{release_name.lower()}_table_meta.txt'), table_meta)

In [26]:
# astroobjects metadata

astroobjects_dir = os.path.join(release_dir, 'astroobjects')
os.makedirs(astroobjects_dir, exist_ok=True)

astroobjects_header = {'source_name': None, 'ra': None, 'dec': None}
with open(os.path.join(astroobjects_dir, f'{release_name.lower()}_astroobjects.txt'), 'w', newline='') as f:
    csv_writer = csv.DictWriter(f, astroobjects_header.keys(), delimiter='|')
    csv_writer.writeheader()
    for row in write_table:
        csv_writer.writerow({
            'source_name': row['name'],
            'ra': row['ra'],
            'dec': row['dec']
        })

In [38]:
# Copy files

import shutil

# Create destination
data_central_dir = os.path.join(release_dir, 'data')
os.makedirs(data_central_dir, exist_ok=True)

# Write catalog
ascii.write(write_table, os.path.join(data_central_dir, votable_filename.replace('.xml', '.csv')), format='csv', overwrite=True)

# Copy fits product files
for folder in os.listdir(release_name):
    if folder.endswith('.tar'):
        continue
    folder_path = os.path.join(release_name, folder)
    dest_path = os.path.join(data_central_dir, folder)
    if not os.path.exists(dest_path):
        os.makedirs(dest_path, exist_ok=True)
    files = glob.glob(f'{folder_path}/*fits')
    for f in files:
        shutil.copy(f, dest_path)

### Obscore metadata

There are some FITS headers that are essential for ingestion to Data Central. These include:

* `INSTRUME` keyword: The value field shall contain a character string identifying the instrument used to acquire the data associated with the header.
* `TELESCOP` keyword: The value field shall contain a character string identifying the telescope used to acquire the data associated with the header.
* `OBSERVER` keyword: The value field shall contain a character string identifying who acquired the data associated with the header. (e.g. "ASKAP team" or "John Smith")
* `MJD-OBS`: Date/time of observation. // could be any of the regular keywords that indicate the time of observation (e.g UT, MJD)
* `MJD-STR`: Date/time of which observation started.
* `MJD-END`: Date/time of which observation ended.
* `EXPTIME`: exposure time in seconds

To add these to your data release update the dict in the cell below with the desired fields for these header cards.

In [39]:
# FITS header keywords and values

dc_meta_header = {
    'INSTRUME': 'ASKAP',
    'TELESCOP': 'ASKAP',
    'OBSERVER': 'WALLABY',
    'MJD-OBS': '2019-10-25T19:23:47.900000',
    'MJD-STR': '2019-10-25T19:23:47.900000',
    'MJD-END': '2019-10-26T19:07:47.900000',
    'EXPTIME': '28800',
}

In [40]:
print(data_central_dir)

data_central/wallaby/wallaby_test_pdr/data


In [41]:
# Update product headers

hdu0_keywords = ['SIMPLE', 'BITPIX', 'NAXIS', 'EXTEND', 'ORIGIN', 'EQUINOX', 'LONPOLE', 'LATPOLE', 'SRCVERS', 'SRCTR', 'OBJECT', 'INSTRUME', 'TELESCOP', 'OBSERVER', 'MJD-OBS', 'MJD-STR', 'MJD-END', 'EXPTIME']

for idx, folder in enumerate(os.listdir(data_central_dir)):
    if folder.endswith('.tar') or folder.endswith('.csv'):
        continue
    folder_path = os.path.join(data_central_dir, folder)
    files = glob.glob(f'{folder_path}/*.fits')
    for ff in files:
        with fits.open(ff, mode='update') as hdul:
            header = hdul[0].header
            for key, value in dc_meta_header.items():
                header[key] = value
            if ff.endswith('spec.fits'):
                # Treat spec files differently
                hdr1 = hdul[0].header
                hdr2 = hdul[1].header
                hdr2['RA'] = float(hdr1['CRVAL1'])
                hdr2['DEC'] = float(hdr1['CRVAL2'])
                # Construct new header
                hdr = fits.Header()
                for kw in hdu0_keywords:
                    hdr[kw] = hdr1[kw]
                hdul[0].header = hdr
            hdul.flush()
    print(f'Updated fits headers for files in {folder_path}')

Updated fits headers for files in data_central/wallaby/wallaby_test_pdr/data/WALLABY J100539-282633
Updated fits headers for files in data_central/wallaby/wallaby_test_pdr/data/WALLABY J100426-282638
Updated fits headers for files in data_central/wallaby/wallaby_test_pdr/data/WALLABY J100336-262923
Updated fits headers for files in data_central/wallaby/wallaby_test_pdr/data/WALLABY J100827-270707
Updated fits headers for files in data_central/wallaby/wallaby_test_pdr/data/WALLABY J100713-262336
Updated fits headers for files in data_central/wallaby/wallaby_test_pdr/data/WALLABY J100351-263707
Updated fits headers for files in data_central/wallaby/wallaby_test_pdr/data/WALLABY J100634-295615
Updated fits headers for files in data_central/wallaby/wallaby_test_pdr/data/WALLABY J100808-260942
Updated fits headers for files in data_central/wallaby/wallaby_test_pdr/data/WALLABY J100707-262300
Updated fits headers for files in data_central/wallaby/wallaby_test_pdr/data/WALLABY J100720-262426


In [42]:
# Copy fits headers from moment 0 map to spectra fits file

header_cards = ['ORIGIN','CUNIT1','CUNIT2','RESTFRQ','SPECSYS','EQUINOX','RADESYS','LONPOLE','LATPOLE','BUNIT','BMAJ','BMIN','BPA','SRCVERS','SRCTR','OBJECT']

for idx, folder in enumerate(os.listdir(data_central_dir)):
    if folder.endswith('.tar') or folder.endswith('.csv'):
        continue
    folder_path = os.path.join(data_central_dir, folder)
    files = glob.glob(f'{folder_path}/*.fits')
    mom0 = [f for f in files if f.endswith('mom0.fits')]
    spec = [f for f in files if f.endswith('spec.fits')]
    if len(mom0) > 1 or len(spec) > 1:
        raise Exception('More than 1 spec/mom0.fits file found')
    ref_header = fits.getheader(mom0[0])
    with fits.open(spec[0], mode='update') as hdul:
        header = hdul[1].header
        for k in header_cards:
            header[k] = ref_header[k]
        for key, value in dc_meta_header.items():
                header[key] = value
        hdul.flush()

---