# Make MLZ input catalog of [1-1], [2-1], and [1-2] groups from fof matching.
Editted by HyeYun Park, adding MLZ catalog making to 'matching_fof.ipynb' <br>
(Match truth and coadd catalogs for DC2 Run 1.1p : Owner: Yao-Yuan Mao, Scott Daniel (with help from Anže Slosar, Bhairav Valera, HyeYun Park)) <br>

**Notes:**
- Follow this [step-by-step guide](https://confluence.slac.stanford.edu/x/Xgg4Dg) if you don't know how to run this notebook.
- If you need more information about the Generic Catalog Reader (GCR), see [this diagram](https://github.com/yymao/generic-catalog-reader/blob/master/README.md#concept) and [more examples](https://github.com/LSSTDESC/gcr-catalogs/blob/master/examples/GCRCatalogs%20Demo.ipynb).

In [3]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
from astropy.coordinates import SkyCoord
import FoFCatalogMatching
import GCRCatalogs

In [None]:
GCRCatalogs.available_catalogs

In [5]:
# load coadd catalog (for a single tract)
coadd_cat = GCRCatalogs.load_catalog('dc2_object_run1.2i_all_columns')

In [None]:
# Let's first visually inspect the footprint of one tract of the coadd catalog.
# When `return_iterator` is turned on, the method `get_quantities` will return an 
# iterator, and each element in the iterator will be the quantities we requested in 
# different chunks of the dataset. 

# For coadd catalogs, the different chunks happen to be different patches, 
# resulting in a different color for each patch in the scatter plot below.

for coadd_data in coadd_cat.get_quantities(['ra', 'dec'], return_iterator=True):
    plt.scatter(coadd_data['ra'], coadd_data['dec'], s=1, rasterized=True);

plt.xlabel('RA');
plt.ylabel('Dec');

In [None]:
# Let's choose a small RA and Dec range to do the matching so that it won't take too long!
ra_min, ra_max = 55.5, 56.0
dec_min, dec_max = -29.0, -28.5

coord_filters = [
    'ra >= {}'.format(ra_min),
    'ra < {}'.format(ra_max),
    'dec >= {}'.format(dec_min),
    'dec < {}'.format(dec_max),
]

In [None]:
# star, sprinkled, agn cuts for the truth catalog.
from GCR import GCRQuery
star_sprinkled_filter = [
    ~GCRQuery('star'),
    ~GCRQuery('sprinkled'),
    ~GCRQuery('agn')
]

In [None]:
# Lensing cuts based on Mandelbaum 2017 (arxiv 1705.06745)

lensing_cuts = [
    ~GCRQuery((np.isnan, 'i_modelfit_CModel_instFlux')), # (from this and below) remove nan entries
    ~GCRQuery((np.isnan, 'ext_shapeHSM_HsmShapeRegauss_resolution')),
    ~GCRQuery((np.isnan, 'ext_shapeHSM_HsmShapeRegauss_e1')),
    ~GCRQuery((np.isnan, 'ext_shapeHSM_HsmShapeRegauss_e2')),
    GCRQuery('i_SN_cmodel >= 10'),
    GCRQuery('detect_isPrimary'), # (from this and below) basic flag cuts 
    ~GCRQuery('deblend_skipped'),
    ~GCRQuery('base_PixelFlags_flag_edge'),
    ~GCRQuery('base_PixelFlags_flag_interpolatedCenter'),
    ~GCRQuery('base_PixelFlags_flag_saturatedCenter'),
    ~GCRQuery('base_PixelFlags_flag_crCenter'),
    ~GCRQuery('base_PixelFlags_flag_bad'),
    ~GCRQuery('base_PixelFlags_flag_suspectCenter'),
    ~GCRQuery('base_PixelFlags_flag_clipped'),
    ~GCRQuery('ext_shapeHSM_HsmShapeRegauss_flag'),
    GCRQuery('HSM_res >= 0.3'),
    GCRQuery('HSM_ell < 2.0'),
    GCRQuery('ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4'),
    GCRQuery('mag_i_cModel < 24.5'), # FIXME: Doesnt have exinction correction
    GCRQuery('base_Blendedness_abs_instFlux < 10**(-0.375)'),
]
#    GCRQuery('i_SN_cmodel >= 10'), # (from this and below) cut on object properties

In [None]:
# Let's also define a magnitude cut
mag_filters = [
    (np.isfinite, 'mag_i'),
    'mag_i < 24.5',
]

In [None]:
# let's add total ellipticity for later use (not needed for now)
coadd_cat.add_derived_quantity('shape_hsm_regauss_etot', np.hypot, 'ext_shapeHSM_HsmShapeRegauss_e1', 'ext_shapeHSM_HsmShapeRegauss_e2')

In [None]:
coadd_cat.add_quantity_modifier('i_SN_cmodel', 
                              (np.divide, 'i_modelfit_CModel_instFlux', 'i_modelfit_CModel_instFluxErr'), 
                              overwrite=True)
coadd_cat.add_quantity_modifier('HSM_res', 
                              'ext_shapeHSM_HsmShapeRegauss_resolution', 
                              overwrite=True)
coadd_cat.add_quantity_modifier('HSM_ell', 
                              (np.hypot, 'ext_shapeHSM_HsmShapeRegauss_e1', 'ext_shapeHSM_HsmShapeRegauss_e2'), 
                              overwrite=True)
coadd_cat.add_quantity_modifier('psf_size', 
                              (lambda xx, yy, xy: 0.168*2.355*(xx*yy - xy*xy)**0.25, 'i_base_SdssShape_psf_xx', 'i_base_SdssShape_psf_yy', 'i_base_SdssShape_psf_xy'),
                              overwrite=True)

In [None]:
# Load ra and dec from coadd, using both of the filters we just defined. (why not also grab e1 and e2 for later use?)
coadd_data = coadd_cat.get_quantities(['ra', 'dec', 'objectId','mag_i_cModel','mag_u_cModel','mag_g_cModel','mag_r_cModel',
                                       'mag_y_cModel','mag_z_cModel',
                                       'magerr_i','magerr_u','magerr_g','magerr_r','magerr_y',
                                       'magerr_z','shape_hsm_regauss_etot'],filters=(lensing_cuts))#, filters=(coord_filters + mag_filters))

In [None]:
# Let's now turn to the truth catalog, turn of md5 sum check to save time
truth_cat = GCRCatalogs.load_catalog('dc2_truth_run1.2_static', {'md5': None})

In [None]:
# for a reason that we will soon see, let's inspect the quantities in truth catalog

print(sorted(truth_cat.list_all_quantities()))
print('---')
print(sorted(truth_cat.list_all_native_quantities()))

In [None]:
# so we see there is not mag_i, but only mag_true_i (i.e., magnitude before lensing), and it maps to `i`
truth_cat.get_quantity_modifier('mag_true_i')

In [None]:
# to make our `mag_filters` work, let's define mag_i for the truth catalog
truth_cat.add_quantity_modifier('mag_i', 'i')

In [None]:
# get ra and dec from truth catalog
# note that we add i < 24.5 to the native filter to speed up load time
#truth_native_filters = (coord_filters + ['i < 24.5'])
truth_data = truth_cat.get_quantities(['ra', 'dec', 'object_id', 'star', 'sprinkled','agn','redshift','mag_true_i',
                                      'g','mag_true_g','mag_true_r', 'mag_true_u', 'mag_true_y', 'mag_true_z'],filters=star_sprinkled_filter)#, filters=mag_filters, native_filters=truth_native_filters)

# We will use the object_id, star, and sprinkled columns when cross-referencing truth information with the extragalactic catalog.

In [None]:
# check number of objects in truth and coadd catalog with the cuts (filters) 
print (len(coadd_data['ra']))
print (len(truth_data['ra']))

In [None]:
# now we can really do the matching!
# FoFCatalogMatching.match takes a dictionary of catalogs to match, a friends-of-friends linking length. 
# Because our "catalog" is not an astropy table or pandas dataframe, 
# `len(truth_coord)` won't give the actual length of the table
# so we need to specify `catalog_len_getter` so that the code knows how to get the length of the catalog.

results = FoFCatalogMatching.match(
    catalog_dict={'truth': truth_data, 'coadd': coadd_data},
    linking_lengths=1.0,
    catalog_len_getter=lambda x: len(x['ra']),
)

In [34]:
# return of FoFCatalogMatching.match is an astropy table
results

row_index,catalog_key,group_id
int64,str5,int64
0,truth,0
9166,coadd,0
1,truth,1
9843,coadd,1
2,truth,2
10045,coadd,2
3,truth,3
9932,coadd,3
4,truth,4
640,coadd,4


In [None]:
# now we want to count the number of truth and coadd objects *for each group*
# but instead of looping over groups, we can do this in a smart (and very fast) way

# first we need to know which rows are from the truth catalog and which are from the coadd
truth_mask = results['catalog_key'] == 'truth'
coadd_mask = ~truth_mask

# then np.bincount will give up the number of id occurrences (like historgram but with integer input)
n_groups = results['group_id'].max() + 1
n_truth = np.bincount(results['group_id'][truth_mask], minlength=n_groups)
n_coadd = np.bincount(results['group_id'][coadd_mask], minlength=n_groups)

# now n_truth and n_coadd are the number of truth/coadd objects in each group
# we want to make a 2d histrogram of (n_truth, n_coadd). 
n_max = max(n_truth.max(), n_coadd.max()) + 1
hist_2d = np.bincount(n_coadd * n_max + n_truth, minlength=n_max*n_max).reshape(n_max, n_max)

plt.imshow(np.log10(hist_2d+1), extent=(-0.5, n_max-0.5, -0.5, n_max-0.5), origin='lower');
plt.xlabel('Number of truth objects');
plt.ylabel('Number of coadd objects');
plt.xlim(-0.5,5.5)
plt.ylim(-0.5,5.5)
plt.colorbar(label=r'$\log(N_{\rm groups} \, + \, 1)$');

In [None]:
# Let's further inspect the objects in the groups that have 1-to-1 truth/coadd match.

# first, let's find our the IDs of the groups that have 1-to-1 truth/coadd match:
one_to_one_group_mask = np.in1d(results['group_id'], np.flatnonzero((n_truth == 1) & (n_coadd == 1)))

# and then we can find the row indices in the *original* truth/coadd catalogs for those 1-to-1 groups
truth_idx = results['row_index'][one_to_one_group_mask & truth_mask]
coadd_idx = results['row_index'][one_to_one_group_mask & coadd_mask]

In [None]:
# Let's further inspect the objects in the groups that have 2-to-1 truth/coadd match.
# This group will contain possible blended objects.
# first, let's find our the IDs of the groups that have 2-to-1 truth/coadd match:
two_to_one_group_mask = np.in1d(results['group_id'], np.flatnonzero((n_truth == 2) & (n_coadd == 1)))

# and then we can find the row indices in the *original* truth/coadd catalogs for those 2-to-1 groups
truth_idx21 = results['row_index'][two_to_one_group_mask & truth_mask]
coadd_idx21 = results['row_index'][two_to_one_group_mask & coadd_mask]

In [None]:
# Let's further inspect the objects in the groups that have 1-to-2 truth/coadd match.
# This group will contain mis-detected objects.
# first, let's find our the IDs of the groups that have 2-to-1 truth/coadd match:
one_to_two_group_mask = np.in1d(results['group_id'], np.flatnonzero((n_truth == 1) & (n_coadd == 2)))

# and then we can find the row indices in the *original* truth/coadd catalogs for those 1-to-2 groups
truth_idx12 = results['row_index'][one_to_two_group_mask & truth_mask]
coadd_idx12 = results['row_index'][one_to_two_group_mask & coadd_mask]

In [None]:
# Check number of objects in each group.
print (len(truth_idx21))
print (len(coadd_idx21))
print (len(coadd_idx))
print (len(truth_idx12))
print (len(coadd_idx12))

In [None]:
# Use truth catalog's spec-z and magitudes to generate train and test catalogs for MLZ.
truth_z=truth_data['redshift'][truth_idx]
truth_mag_i=truth_data['mag_true_i'][truth_idx]
truth_mag_u=truth_data['mag_true_u'][truth_idx]
truth_mag_g=truth_data['mag_true_g'][truth_idx]
truth_mag_r=truth_data['mag_true_r'][truth_idx]
truth_mag_y=truth_data['mag_true_y'][truth_idx]
truth_mag_z=truth_data['mag_true_z'][truth_idx]

In [55]:
# Use truth catalog's spec-z and magitudes to generate train and test catalogs for MLZ.
trainingfile=open("DC2.train", "w")
trainingfile.write("#redshift u g r i z u-g g-r r-i i-z\n")
testfile=open("DC2.test","w")
testfile.write("#redshift u g r i z u-g g-r r-i i-z\n")
# You need random index to reduce bias on selecting magnitudes in order.
index=np.random.choice(len(truth_z),len(truth_z),replace=False)

for i in index[0:len(index)//2]:
    string='%.6f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f\n'%(truth_z[i],truth_mag_u[i],truth_mag_g[i],
                                              truth_mag_r[i],truth_mag_i[i],truth_mag_z[i],
                                             truth_mag_u[i]-truth_mag_g[i], truth_mag_g[i]-truth_mag_r[i],
                                             truth_mag_r[i]-truth_mag_i[i], truth_mag_i[i]-truth_mag_z[i])
    trainingfile.write(string)
trainingfile.close()

for i in index[len(index)//2+1:len(index)]:
    string='%.6f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f\n'%(truth_z[i],truth_mag_u[i],truth_mag_g[i],
                                              truth_mag_r[i],truth_mag_i[i],truth_mag_z[i],
                                             truth_mag_u[i]-truth_mag_g[i], truth_mag_g[i]-truth_mag_r[i],
                                             truth_mag_r[i]-truth_mag_i[i], truth_mag_i[i]-truth_mag_z[i])
    testfile.write(string)
testfile.close()

In [22]:
# Use truth catalog's spec-z and coadd catalog's magitudes (+errors) to generate train and test catalogs for MLZ.
# This is for [1-1] group, which might be perfect match.
truth_z=truth_data['redshift'][truth_idx]
coadd_mag_i=coadd_data['mag_i_cModel'][coadd_idx]
coadd_mag_u=coadd_data['mag_u_cModel'][coadd_idx]
coadd_mag_r=coadd_data['mag_r_cModel'][coadd_idx]
coadd_mag_g=coadd_data['mag_g_cModel'][coadd_idx]
coadd_mag_y=coadd_data['mag_y_cModel'][coadd_idx]
coadd_mag_z=coadd_data['mag_z_cModel'][coadd_idx]
mag_error_i=coadd_data['magerr_i'][coadd_idx]
mag_error_u=coadd_data['magerr_u'][coadd_idx]
mag_error_r=coadd_data['magerr_r'][coadd_idx]
mag_error_g=coadd_data['magerr_g'][coadd_idx]
mag_error_y=coadd_data['magerr_y'][coadd_idx]
mag_error_z=coadd_data['magerr_z'][coadd_idx]


In [31]:
# Use truth catalog's spec-z and coadd catalog's magitudes (+errors) to generate train and test catalogs for MLZ.
# This is for [1-1] group, which might be perfect match.
trainingfile=open("DC2_out_ss_lensingcuts_SN.train", "w")
trainingfile.write("#redshift u g r i y z u-g g-r r-i i-z eu eg er ei ey ez\n")
testfile=open("DC2_out_ss_lensingcuts_SN.test","w")
testfile.write("#redshift u g r i y z u-g g-r r-i i-z eu eg er ei ey ez\n")
# You need random index to reduce bias on selecting magnitudes in order.
index=np.random.choice(len(truth_z),len(truth_z),replace=False)

for i in index[0:len(index)//2]:
    if not np.any(np.isnan([truth_z[i],coadd_mag_i[i], coadd_mag_u[i], coadd_mag_r[i], coadd_mag_g[i], coadd_mag_y[i],coadd_mag_z[i], mag_error_i[i], mag_error_u[i], mag_error_r[i], mag_error_g[i], mag_error_y[i], mag_error_z[i]])):
        string='%.6f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.9f %.9f %.9f %.9f %.9f %.9f\n'%(truth_z[i],coadd_mag_u[i],coadd_mag_g[i],coadd_mag_r[i],
                                                                                               coadd_mag_i[i],coadd_mag_y[i],coadd_mag_z[i],coadd_mag_u[i]-coadd_mag_g[i], 
                                                                                               coadd_mag_g[i]-coadd_mag_r[i],coadd_mag_r[i]-coadd_mag_i[i], 
                                                                                               coadd_mag_i[i]-coadd_mag_z[i],mag_error_u[i], mag_error_g[i],
                                                                                               mag_error_r[i], mag_error_i[i],mag_error_y[i],mag_error_z[i])
        trainingfile.write(string)
trainingfile.close()

for i in index[len(index)//2+1:len(index)]:
    if not np.any(np.isnan([truth_z[i],coadd_mag_i[i], coadd_mag_u[i], coadd_mag_r[i], coadd_mag_g[i], coadd_mag_y[i],coadd_mag_z[i], mag_error_i[i], mag_error_u[i], mag_error_r[i], mag_error_g[i], mag_error_y[i], mag_error_z[i]])):
        string='%.6f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.9f %.9f %.9f %.9f %.9f %.9f\n'%(truth_z[i], coadd_mag_u[i],coadd_mag_g[i],
                                                  coadd_mag_r[i],coadd_mag_i[i],coadd_mag_y[i], coadd_mag_z[i],coadd_mag_u[i]-coadd_mag_g[i], coadd_mag_g[i]-coadd_mag_r[i],coadd_mag_r[i]-coadd_mag_i[i], coadd_mag_i[i]-coadd_mag_z[i],mag_error_u[i], mag_error_g[i],mag_error_r[i], mag_error_i[i],mag_error_y[i],mag_error_z[i])
        testfile.write(string)
testfile.close()

NameError: name 'truth_z' is not defined

In [83]:
# Use truth catalog's spec-z and coadd catalog's magitudes (+errors) to generate train and test catalogs for MLZ.
# This is for [2-1] group, which means two truth objects, one coadd object.
# Therefore we need to pick one object from truth for spec-z information.
truth_z21=[]
truth_z21_all=truth_data['redshift'][truth_idx21]
truth_mag_i21_all=truth_data['mag_true_i'][truth_idx21]

# We will choose brighter/fainter object from two truth object.
# and we cut out truth objects at mag 25. (we put nan value here and will exclude them on the catalog)
for i in range(len(truth_idx21)//2):
    if truth_mag_i21_all[2*i]<=25 and truth_mag_i21_all[2*i+1]<=25:
        if truth_mag_i21_all[2*i]>=truth_mag_i21_all[2*i+1]:
            brighter=truth_z21_all[2*i+1]
            fainter=truth_z21_all[2*i]
        else:
            brighter=truth_z21_all[2*i]
            fainter=truth_z21_all[2*i+1]
    else:
        brighter=np.nan
        fainter=np.nan
    truth_z21=np.append(truth_z21, brighter) #or (truth_z21, fainter) to choose fainter truth object.
coadd_mag_i21=coadd_data['mag_i_cModel'][coadd_idx21]
coadd_mag_u21=coadd_data['mag_u_cModel'][coadd_idx21]
coadd_mag_r21=coadd_data['mag_r_cModel'][coadd_idx21]
coadd_mag_g21=coadd_data['mag_g_cModel'][coadd_idx21]
coadd_mag_y21=coadd_data['mag_y_cModel'][coadd_idx21]
coadd_mag_z21=coadd_data['mag_z_cModel'][coadd_idx21]
mag_error_i21=coadd_data['magerr_i'][coadd_idx21]
mag_error_u21=coadd_data['magerr_u'][coadd_idx21]
mag_error_r21=coadd_data['magerr_r'][coadd_idx21]
mag_error_g21=coadd_data['magerr_g'][coadd_idx21]
mag_error_y21=coadd_data['magerr_y'][coadd_idx21]
mag_error_z21=coadd_data['magerr_z'][coadd_idx21]

[0.07915652        nan        nan ...        nan        nan        nan]
12378


In [37]:
# Use truth catalog's spec-z and coadd catalog's magitudes (+errors) to generate train and test catalogs for MLZ.
# This is for [2-1] group, which means two truth objects, one coadd object.

trainingfile=open("DC2_out_ss_brighter_21.train", "w")
trainingfile.write("#redshift u g r i y z u-g g-r r-i i-z eu eg er ei ey ez\n")
testfile=open("DC2_out_ss_brighter_21.test","w")
testfile.write("#redshift u g r i y z u-g g-r r-i i-z eu eg er ei ey ez\n")
index=np.random.choice(len(truth_z21),len(truth_z21),replace=False)

for i in index[0:len(index)//2]:
    if not np.any(np.isnan([truth_z21[i],coadd_mag_i21[i], coadd_mag_u21[i], 
                            coadd_mag_r21[i], coadd_mag_g21[i], coadd_mag_y21[i],
                            coadd_mag_z21[i], mag_error_i21[i], mag_error_u21[i], 
                            mag_error_r21[i], mag_error_g21[i], mag_error_y21[i], 
                            mag_error_z21[i]])):
        string='%.6f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.9f %.9f %.9f %.9f %.9f %.9f\n'%(truth_z21[i],coadd_mag_u21[i],coadd_mag_g21[i],coadd_mag_r21[i],
                                                                                               coadd_mag_i21[i],coadd_mag_y21[i],coadd_mag_z21[i],coadd_mag_u21[i]-coadd_mag_g21[i], 
                                                                                               coadd_mag_g21[i]-coadd_mag_r21[i],coadd_mag_r21[i]-coadd_mag_i21[i], 
                                                                                               coadd_mag_i21[i]-coadd_mag_z21[i],mag_error_u21[i], mag_error_g21[i],
                                                                                               mag_error_r21[i], mag_error_i21[i],mag_error_y21[i],mag_error_z21[i])
        trainingfile.write(string)
trainingfile.close()

for i in index[len(index)//2+1:len(index)]:
    if not np.any(np.isnan([truth_z21[i],coadd_mag_i21[i], coadd_mag_u21[i], 
                            coadd_mag_r21[i], coadd_mag_g21[i], coadd_mag_y21[i],
                            coadd_mag_z21[i], mag_error_i21[i], mag_error_u21[i], 
                            mag_error_r21[i], mag_error_g21[i], mag_error_y21[i],
                            mag_error_z21[i]])):
        string='%.6f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.9f %.9f %.9f %.9f %.9f %.9f\n'%(truth_z21[i], coadd_mag_u21[i],coadd_mag_g21[i],
                                                  coadd_mag_r21[i],coadd_mag_i21[i],coadd_mag_y21[i], coadd_mag_z21[i],coadd_mag_u21[i]-coadd_mag_g21[i], 
                                                                                                         coadd_mag_g21[i]-coadd_mag_r21[i],coadd_mag_r21[i]-coadd_mag_i21[i], 
                                                                                                         coadd_mag_i21[i]-coadd_mag_z21[i],mag_error_u21[i], mag_error_g21[i],
                                                                                                         mag_error_r21[i], mag_error_i21[i],mag_error_y21[i],mag_error_z21[i])
        testfile.write(string)
testfile.close()

In [82]:
# Use truth catalog's spec-z and coadd catalog's magitudes (+errors) to generate train and test catalogs for MLZ.
# This is for [1-2] group, which means one truth objects, two coadd object.
# Therefore we need to pick one object from coadd. I used 'mag_i_cModel' to select brighter/fainter one.
coadd_mag_i12, index_b=[],[]
truth_mag_i12=truth_data['mag_true_i'][truth_idx12]
coadd_mag_i12_all=coadd_data['mag_i_cModel'][coadd_idx12]

truth_z12=truth_data['redshift'][truth_idx12]

for i in range(len(coadd_idx12)//2):
    if truth_mag_i12[i]>25:
        brighter=np.nan
        fainter=np.nan
        index_b=np.append(index_b,2*i)
    else:
        if coadd_mag_i12_all[2*i]>=coadd_mag_i12_all[2*i+1]:
            brighter=coadd_mag_i12_all[2*i+1]
            fainter=coadd_mag_i12_all[2*i]
            index_b=np.append(index_b,2*i+1)
        else:
            brighter=coadd_mag_i12_all[2*i]
            fainter=coadd_mag_i12_all[2*i+1]
            index_b=np.append(index_b,2*i)
    coadd_mag_i12=np.append(coadd_mag_i12, brighter)#or (truth_z21, fainter) to choose fainter truth object.

print (truth_z12)
index_b=np.asanyarray(index_b,int)

coadd_mag_u12=coadd_data['mag_u_cModel'][coadd_idx12][index_b]
coadd_mag_r12=coadd_data['mag_r_cModel'][coadd_idx12][index_b]
coadd_mag_g12=coadd_data['mag_g_cModel'][coadd_idx12][index_b]
coadd_mag_y12=coadd_data['mag_y_cModel'][coadd_idx12][index_b]
coadd_mag_z12=coadd_data['mag_z_cModel'][coadd_idx12][index_b]
mag_error_i12=coadd_data['magerr_i'][coadd_idx12][index_b]
mag_error_u12=coadd_data['magerr_u'][coadd_idx12][index_b]
mag_error_r12=coadd_data['magerr_r'][coadd_idx12][index_b]
mag_error_g12=coadd_data['magerr_g'][coadd_idx12][index_b]
mag_error_y12=coadd_data['magerr_y'][coadd_idx12][index_b]
mag_error_z12=coadd_data['magerr_z'][coadd_idx12][index_b]

[0.08490384 0.07975113 0.10453808 ... 0.99175477 0.98569262 0.99781561]
13554 6777 6777 6777
3037


In [79]:
# Use truth catalog's spec-z and coadd catalog's magitudes (+errors) to generate train and test catalogs for MLZ.
# This is for [1-2] group, which means one truth objects, two coadd object.
trainingfile=open("DC2_out_ss_darker_12.train", "w")
trainingfile.write("#redshift u g r i y z u-g g-r r-i i-z eu eg er ei ey ez\n")
testfile=open("DC2_out_ss_darker_12.test","w")
testfile.write("#redshift u g r i y z u-g g-r r-i i-z eu eg er ei ey ez\n")
index=np.random.choice(len(truth_z12),len(truth_z12),replace=False)

for i in index[0:len(index)//2]:
    if not np.any(np.isnan([truth_z12[i],coadd_mag_i12[i], coadd_mag_u12[i], 
                            coadd_mag_r12[i], coadd_mag_g12[i], coadd_mag_y12[i],
                            coadd_mag_z12[i], mag_error_i12[i], mag_error_u12[i], 
                            mag_error_r12[i], mag_error_g12[i], mag_error_y12[i], 
                            mag_error_z12[i]])):
        string='%.6f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.9f %.9f %.9f %.9f %.9f %.9f\n'%(truth_z12[i],coadd_mag_u12[i],coadd_mag_g12[i],coadd_mag_r12[i],
                                                                                               coadd_mag_i12[i],coadd_mag_y12[i],coadd_mag_z12[i],coadd_mag_u12[i]-coadd_mag_g12[i], 
                                                                                               coadd_mag_g12[i]-coadd_mag_r12[i],coadd_mag_r12[i]-coadd_mag_i12[i], 
                                                                                               coadd_mag_i12[i]-coadd_mag_z12[i],mag_error_u12[i], mag_error_g12[i],
                                                                                               mag_error_r12[i], mag_error_i12[i],mag_error_y12[i],mag_error_z12[i])
        trainingfile.write(string)
trainingfile.close()

for i in index[len(index)//2+1:len(index)]:
    if not np.any(np.isnan([truth_z12[i],coadd_mag_i12[i], coadd_mag_u12[i], 
                            coadd_mag_r12[i], coadd_mag_g12[i], coadd_mag_y12[i],
                            coadd_mag_z12[i], mag_error_i12[i], mag_error_u12[i], 
                            mag_error_r12[i], mag_error_g12[i], mag_error_y12[i], 
                            mag_error_z12[i]])):
        string='%.6f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.9f %.9f %.9f %.9f %.9f %.9f\n'%(truth_z12[i],coadd_mag_u12[i],coadd_mag_g12[i],coadd_mag_r12[i],
                                                                                               coadd_mag_i12[i],coadd_mag_y12[i],coadd_mag_z12[i],coadd_mag_u12[i]-coadd_mag_g12[i], 
                                                                                               coadd_mag_g12[i]-coadd_mag_r12[i],coadd_mag_r12[i]-coadd_mag_i12[i], 
                                                                                               coadd_mag_i12[i]-coadd_mag_z12[i],mag_error_u12[i], mag_error_g12[i],
                                                                                               mag_error_r12[i], mag_error_i12[i],mag_error_y12[i],mag_error_z12[i])
        testfile.write(string)
testfile.close()