# Data discovery notebook for Diff image analysis

If its actually needed, we will set up this notebook to find datasets that are suitable for constructing difference images.  e.g., when given a repository, construct a catalog of calexps and their corresponding coadds that could be used as a diff image template.

In [1]:
import os
import pandas as pd
import pyarrow.parquet as pa

REPO = '/project/mrawls/hits2015/rerun/coaddtest1/'  
from lsst.daf.persistence import Butler
butler = Butler(REPO)



In [2]:
metadata = butler.queryMetadata('src',['visit','ccd','filter'])

In [3]:
dataId={'visit': int(metadata[0][0]), 'ccd':int(metadata[0][1]), 'filter':metadata[0][2]}
butler.datasetExists('src', dataId=dataId) #Return True if the Dataset is actually present in the Datastore.


False

In [4]:
dataId={'visit': 410915, 'ccd': 33, 'filter': 'g'} #This dataId exists
butler.datasetExists('src', dataId=dataId) #Return True if the Dataset is actually present in the Datastore.
srcCatalog = butler.get('src', dataId=dataId).asAstropy().to_pandas() # get the source catalog
srcCatalog.head()

Unnamed: 0,id,coord_ra,coord_dec,parent,calib_detected,calib_psf_candidate,calib_psf_used,calib_psf_reserved,deblend_nChild,deblend_deblendedAsPsf,...,base_FootprintArea_value,calib_astrometry_used,calib_photometry_used,calib_photometry_reserved,base_localPhotoCalib,base_localPhotoCalibErr,base_CDMatrix_1_1,base_CDMatrix_1_2,base_CDMatrix_2_1,base_CDMatrix_2_2
0,176486790377504769,2.606623,0.037624,0,False,False,False,False,5,False,...,820,False,False,False,4.227711,0.000852,-1.720409e-08,7.3e-05,-7.3e-05,-5.749108e-08
1,176486790377504770,2.606622,0.038223,0,False,False,False,False,0,False,...,153,False,False,False,4.227711,0.000852,-2.778872e-08,7.3e-05,-7.3e-05,-4.675232e-08
2,176486790377504771,2.606621,0.038303,0,False,False,False,False,0,False,...,114,False,False,False,4.227711,0.000852,-2.919633e-08,7.3e-05,-7.3e-05,-4.53239e-08
3,176486790377504772,2.606622,0.038246,0,False,False,False,False,4,False,...,413,False,False,False,4.227711,0.000852,-2.8185e-08,7.3e-05,-7.3e-05,-4.635117e-08
4,176486790377504773,2.606622,0.038147,0,False,False,False,False,2,False,...,265,False,False,False,4.227711,0.000852,-2.644821e-08,7.3e-05,-7.3e-05,-4.811288e-08


In [5]:
src_catalog = butler.get('src', dataId=dataId) # get the source catalog

In [6]:
rowData = []
for dataset in metadata:
    dataId = {'visit': int(dataset[0]), 'ccd': int(dataset[1]), 'filter':dataset[2]}
    if butler.datasetExists('src', dataId=dataId):
        srcCatalog = butler.get('src', dataId=dataId).asAstropy().to_pandas() # get the source catalog
        raMax = srcCatalog['coord_ra'].max()
        raMin = srcCatalog['coord_ra'].min()
        decMax = srcCatalog['coord_dec'].max()
        decMin = srcCatalog['coord_dec'].min()
        raCenter = 0.5*(raMax + raMin)
        decCenter = 0.5*(decMax + decMin)
        rowData.append([int(dataset[1]), int(dataset[1]), dataset[2], raCenter, decCenter, 
                  raMin, raMax, decMin, decMax])
        
df_valid_visists = pd.DataFrame(rowData, columns=['visit', 'ccd', 'DECAM_filter', 'ra_center', 'dec_center', 
                                        'ra_min', 'ra_max', 'dec_min', 'dec_max'])        

In [7]:
df_valid_visists

Unnamed: 0,visit,ccd,DECAM_filter,ra_center,dec_center,ra_min,ra_max,dec_min,dec_max
0,33,33,g,2.609209,0.037195,2.606621,2.611797,0.035915,0.038474
1,34,34,g,2.614635,0.037193,2.612038,2.617231,0.035915,0.038471
2,35,35,g,2.620068,0.037188,2.617470,2.622666,0.035908,0.038468
3,36,36,g,2.625502,0.037184,2.622905,2.628098,0.035907,0.038462
4,37,37,g,2.630928,0.037183,2.628339,2.633517,0.035906,0.038460
...,...,...,...,...,...,...,...,...,...
331,40,40,g,2.611967,0.034311,2.609374,2.614559,0.033034,0.035588
332,41,41,g,2.617397,0.034309,2.614800,2.619993,0.033031,0.035586
333,42,42,g,2.622834,0.034303,2.620237,2.625431,0.033026,0.035580
334,43,43,g,2.628261,0.034302,2.625670,2.630852,0.033026,0.035579
