# Data discovery notebook for Diff image analysis

If its actually needed, we will set up this notebook to find datasets that are suitable for constructing difference images.  e.g., when given a repository, construct a catalog of calexps and their corresponding coadds that could be used as a diff image template.

In [None]:
import os
import pandas as pd
import pyarrow.parquet as pa

REPO = '/project/mrawls/hits2015/rerun/coaddtest1/'  
from lsst.daf.persistence import Butler
butler = Butler(REPO)

In [None]:
metadata = butler.queryMetadata('src',['visit','ccd','filter'])

In [None]:
dataId={'visit': int(metadata[0][0]), 'ccd':int(metadata[0][1]), 'filter':metadata[0][2]}
butler.datasetExists('src', dataId=dataId) #Return True if the Dataset is actually present in the Datastore.


In [None]:
dataId={'visit': 410915, 'ccd': 33, 'filter': 'g'} #This dataId exists
butler.datasetExists('src', dataId=dataId) #Return True if the Dataset is actually present in the Datastore.
srcCatalog = butler.get('src', dataId=dataId).asAstropy().to_pandas() # get the source catalog
srcCatalog.head()

In [None]:
src_catalog = butler.get('src', dataId=dataId) # get the source catalog

In [None]:
rowData = []
for dataset in metadata:
    dataId = {'visit': int(dataset[0]), 'ccd': int(dataset[1]), 'filter':dataset[2]}
    if butler.datasetExists('src', dataId=dataId):
        srcCatalog = butler.get('src', dataId=dataId).asAstropy().to_pandas() # get the source catalog
        raMax = srcCatalog['coord_ra'].max()
        raMin = srcCatalog['coord_ra'].min()
        decMax = srcCatalog['coord_dec'].max()
        decMin = srcCatalog['coord_dec'].min()
        raCenter = 0.5*(raMax + raMin)
        decCenter = 0.5*(decMax + decMin)
        rowData.append([int(dataset[1]), int(dataset[1]), dataset[2], raCenter, decCenter, 
                  raMin, raMax, decMin, decMax])
        
df_valid_visists = pd.DataFrame(rowData, columns=['visit', 'ccd', 'DECAM_filter', 'ra_center', 'dec_center', 
                                        'ra_min', 'ra_max', 'dec_min', 'dec_max'])        

In [None]:
parquet_save_path = '/home/mrabus/notebooks/Stack_club_project/StackClubCourse/Projects/DIA'
df_valid_visists.to_parquet( os.path.join(parquet_save_path,'df.parquet.gzip'), compression='gzip')  