### Sample One day data

In [5]:
from astropy.io import fits
import os
import pandas as pd
import glob
import numpy as np
from astropy import units as u
#import multiprocessing

In [7]:
#Explore_Bintable notebook has a more detailed demostration of this funcntion
def queryCatalina(filename):
    
    # open a FITS file / bintable
    hdul = fits.open(filename)  
    data = hdul[1].data 
    
    #Create reference table for exposures.
    # Risky, hardcoded to 4 exposures. Since it's hardcode on the fit table header
    refer = {'dectection_offset':[0,1,2,3], 
            'exposure_mjd_mid':[hdul[1].header["MJDMID1"], hdul[1].header["MJDMID2"], hdul[1].header["MJDMID3"], hdul[1].header["MJDMID4"]],
            'exposure_duration':[hdul[1].header["EXPOSE1"], hdul[1].header["EXPOSE2"], hdul[1].header["EXPOSE3"], hdul[1].header["EXPOSE4"]],
             'mag_sigma':[hdul[1].header["VPHDEV_1"],hdul[1].header["VPHDEV_2"],hdul[1].header["VPHDEV_3"],hdul[1].header["VPHDEV_4"]]
           }
    refer_table = pd.DataFrame(refer)
    refer_table['exposure_mjd_start'] = refer_table['exposure_mjd_mid'] - refer_table['exposure_duration'] /(3600*24)/2
    
    #Create table for observations:
    obs = {'obs_index' : data["DETS_INDEX"],
           'dectection_offset' : (data["DETS_INDEX"] - 1)% (hdul[1].header["NEXPOSE"]), #Exposure num = 4, is not hardcode here 
            'mjd' : data["TIMESTAMP"] - 2400000.5,
            'ra' : data["RA"],
            'dec' : data["Dec"],
            'ra_sigma' : data["SIGMA"] * u.arcsec.to(u.deg),
            'dec_sigma' : data["SIGMA"] * u.arcsec.to(u.deg),
            'mag' : data["MAGNITUDE"] } #'mag_sigma' : data["DELTAMU"]
    obs_table = pd.DataFrame(obs)
    obs_table = obs_table.astype({"obs_index": str}, errors='raise') 
    obs_table[["ra", "dec"]] = obs_table[["ra", "dec"]].apply(pd.to_numeric)
    
    #JOIN two tables
    final_table = pd.merge(obs_table,refer_table, on = 'dectection_offset', how='left') #probably will change to inner in future
    #Add columns
    final_table['exposure_id'] = file_name[0:-5]
    final_table = final_table.astype({"dectection_offset": str}, errors='raise') 
    final_table['obs_id'] = final_table['exposure_id'] +"_" + final_table['obs_index'] +"_"+ final_table['dectection_offset']
    final_table['observatory_code'] =  hdul[1].header["MPCCODE"]
    final_table['filter'] =  hdul[1].header["FILTER"] 
    #Clear up
    final_table = final_table.drop(columns=['obs_index'])
    final_table = final_table.drop(columns=['dectection_offset'])
    #Formating datatype
    final_table = final_table.astype({"exposure_id": str}, errors='raise')
    final_table = final_table.astype({"obs_id": str}, errors='raise') 
    final_table = final_table.astype({"observatory_code": str}, errors='raise') 
    final_table = final_table.astype({"filter": str}, errors='raise') 
    
    return final_table
    

In [13]:
target_path = "/epyc/projects/adam_datasets/Catalina/data/"

f = sorted(glob.glob(os.path.join("20230503/*.detf")))
dfs = []

for i in f:
    #print(i)
    df = queryCatalina(i)
    dfs.append(df)
    
obs = pd.concat(dfs,ignore_index = True)
obs.to_csv(target_path + "may.csv",index = False)