In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os

from datetime import date

import numpy as np

import astropy.units as u
from astropy.table import Table, QTable

In [8]:
from caterpillar.lsstpipe import cutout

ImportError: cannot import name 'cutout' from 'caterpillar.lsstpipe' (unknown location)

### Prepare the input catalog

In [2]:
cat = '/Users/song/Downloads/broadcut_GAMA09H_cosmos_match_basicinfo.fits'

In [4]:
tab = Table.read(cat)

print(tab.colnames)

['name', 'prefix', 'ra', 'dec', 'radius', 'object_id', 'object_id_isnull']


In [5]:
tab[0]

name,prefix,ra,dec,radius,object_id,object_id_isnull
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,arcsec,Unnamed: 5_level_1,Unnamed: 6_level_1
bytes26,bytes97,float64,float64,float64,int64,bool
cosmos_43153640956908581_y,/tigress/MERIAN/poststamps/cosmos_broad/9812_0_2/43153640956908581/hsc/cosmos_43153640956908581_y,149.482517432927,1.7997843568838428,3.4999708170211936,43153640956908581,False


In [16]:
id_arr = np.asarray([int(name.split('_')[1]) for name in tab['name']])

chunk_arr = np.asarray([p.split('/')[5] for p in tab['prefix']])

tab['id'] = id_arr
tab['chunk'] = chunk_arr

In [90]:
def _get_ra_dec_name(id_arr, ra_arr, dec_arr):
    """Get the object name based on ID and (RA, Dec)."""
    return [
        "{:s}_{:s}_{:s}_{:s}".format(
            str(i), "{:8.4f}".format(ra).strip(), "{:8.4f}".format(dec).strip()
            ) for (i, ra, dec) in zip(id_arr, ra_arr, dec_arr)]

def _get_file_prefix(name_arr, band, prefix):
    """Get the prefix of the output files based on the ID."""
    if prefix is None:
        return ["{:s}_{:s}".format(str(name), band) for name in name_arr]
    else:
        return ["{:s}_{:s}_{:s}".format(prefix, str(name), band) for name in name_arr]

def _get_output_dir(output_dir, chunk_arr, name_arr):
    """Get the directory for the output cutout data."""
    # Check the output directory
    if not os.path.isdir(output_dir):
        raise ValueError("Output directory '{:s}' does not exist".format(output_dir))
    
    return [os.path.join(output_dir, str(chunk), str(name)) for (chunk, name) in zip(chunk_arr, name_arr)]
    
def _get_int_chunk(data, n_chunk):
    """Assign integer chunk ID to the data."""
    if n_chunk > len(data):
        raise ValueError("Too many chunks...")
    if n_chunk <= 0:
        raise ValueError("Chunk number has to be larger than 0...")
    
    chunk_arr = np.ones(len(data), dtype=int)
    if n_chunk == 1:
        return chunk_arr
    
    chunk_size = np.ceil(len(data) / n_chunk).astype(int)

    start, end = 0, chunk_size
    for i in np.arange(n_chunk):
        chunk_arr[start: end] = i + 1
        start, end = end, end + chunk_size 
        end = len(data) if end > len(data) else end
    
    return chunk_arr


def _prepare_input_cat(input_cat, half_size, unit, ra_col, dec_col, band, id_col, chunk, 
                       prefix, output_dir, save=True):
    """
    Prepare the input sample for the given dataset.
    
    The cutouts are organized into:
        [output_dir]/[chunk_id]/[galaxy_id]/[file_name].fits
    And the file name prefix is: 
        ([prefix]_[galaxy_id]_[band]
    """
    # Load the input catalog
    if isinstance(input_cat, str):
        input_cat = Table.read(input_cat)

    # Get an array for half size
    if isinstance(half_size, str):
        if half_size.strip() not in input_cat.colnames:
            raise ValueError("Wrong half size column name. [{:s}]".format(half_size))
        half_size_arr = input_cat[half_size]
    else:
        # Using the same size for all objects
        half_size_arr = np.full(len(input_cat), float(half_size))
    
    if np.any(half_size_arr < 0):
        raise ValueError("Negative size value.")
    
    # Add size unit if necessary
    if unit != 'pixel' and half_size_arr.unit is None:
        # Check the half size unit
        if unit.strip() not in ['arcsec', 'arcmin', 'degree', 'pixel']:
            raise ValueError("Wrong size unit. [arcsec, arcmin, degree, pixel]")
        half_size_arr = [s * u.Unit(unit) for s in half_size_arr]

    # Get the RA and DEC arrays
    if ra_col not in input_cat.colnames:
        raise ValueError("Wrong R.A. column name. [{:s}]".format(ra_col))
    if dec_col not in input_cat.colnames:
        raise ValueError("Wrong Dec column name. [{:s}]".format(dec_col))
    ra_arr, dec_arr = input_cat[ra_col], input_cat[dec_col]
    
    # Get the output directory and file name 
    
    # Get the object id or name
    if id_col is None:
        name_arr = _get_ra_dec_name(np.arange(len(ra_arr)) + 1, ra_arr, dec_arr)
    else:
        if id_col not in input_cat.colnames:
            raise ValueError("Wrong ID column name. [{:s}]".format(id_col))
        name_arr = input_cat[id_col]
    
    # Get the output file prefix 
    prefix_arr = _get_file_prefix(name_arr, band, prefix)
    
    # Get the directory of the output file
    if chunk is not None:
        if isinstance(chunk, str):
            if chunk not in input_cat.colnames:
                raise ValueError("Wrong Chunk column name. [{:s}]".format(chunk))
            chunk_arr = input_cat[chunk]
        else:
            chunk_arr = _get_int_chunk(input_cat, int(chunk))
    else:
        chunk_arr = None
        
    # Get the output file directory
    dir_arr = _get_output_dir(output_dir, chunk_arr, name_arr)

    sample = QTable(
        [name_arr, prefix_arr, dir_arr, chunk_arr, list(ra_arr), list(dec_arr), half_size_arr],
        names=('name', 'prefix', 'dir', 'chunk', 'ra', 'dec', 'half_size')
    )

    if save:
        today = date.today()
        prefix = 'postamps' if prefix is None else prefix
        sample.write(
            os.path.join(output_dir, "{:s}-{:4d}-{:02d}-{:02d}.fits".format(
                prefix, today.year, today.month, today.day)), overwrite=True)
    
    return sample

In [92]:
_prepare_input_cat(tab, 'radius', 'arcsec', 'ra', 'dec', 'N708', 'id', 20, 'cosmos', './', save=True)

name,prefix,dir,chunk,ra,dec,half_size
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,arcsec
int64,str29,str22,int64,float64,float64,float64
43153640956908581,cosmos_43153640956908581_N708,./1/43153640956908581,1,149.482517432927,1.7997843568838428,3.4999708170211936
43153640956912087,cosmos_43153640956912087_N708,./1/43153640956912087,1,149.4615121622147,1.853874214838872,10.247289505034978
43153640956907004,cosmos_43153640956907004_N708,./1/43153640956907004,1,149.49808676780927,1.7751951616501846,5.53583558281855
43153640956913361,cosmos_43153640956913361_N708,./1/43153640956913361,1,149.49421596863644,1.8758752069046896,5.753189594477137
43153636661942697,cosmos_43153636661942697_N708,./1/43153636661942697,1,149.48993681094157,1.6444770735561576,6.943775817233734
43153640956910166,cosmos_43153640956910166_N708,./1/43153640956910166,1,149.47687924272537,1.8268488270143586,4.36454007428045
43153640956911047,cosmos_43153640956911047_N708,./1/43153640956911047,1,149.50178669483344,1.8409192119335216,3.578424428432156
43153640956907529,cosmos_43153640956907529_N708,./1/43153640956907529,1,149.4800255085128,1.7829055839738746,5.34441519719417
43153640956907530,cosmos_43153640956907530_N708,./1/43153640956907530,1,149.4797928881101,1.783289613243299,5.23039506442869
43153640956907838,cosmos_43153640956907838_N708,./1/43153640956907838,1,149.48906587340431,1.7889717631119306,3.115662810703366
