In [1]:
import pyvo as vo
import pandas as pd
import numpy as np
import os
from astropy.time import Time
from astropy import units as u
from tqdm import tqdm, trange
from math import ceil
from astropy.time import TimeDelta
import time

In [2]:
def querySkymapper(start, end, newStart):
    
    tap_service = vo.dal.TAPService("https://api.skymapper.nci.org.au/public/tap/")
    
    regionQuery = f"""
    SELECT COUNT(image_id) AS num, MAX(image_id) AS max_id,MIN(image_id) AS min_id FROM dr2.images WHERE "date" >= {start} AND "date" < {end}
    """
    region = tap_service.search(regionQuery)
    region = pd.DataFrame(region) 
    
    #tarns date format here for file name
    date = Time(start, format='mjd')
    date = date.to_value('iso',subfmt='date')
    
    if(region['num'][0] == 0):
        return None
    
    minID = region['min_id'][0]
    maxID = region['max_id'][0]
    if(newStart > minID):
        minID = newStart
        
    
    query = f"""SELECT 
    a.object_id as obs_id,
    b.image_id as exposure_id,
    c."date" as mjd_utc,
    b.ra_img as ra,
    b.decl_img as dec,
    a.e_raj2000 as ra_sigma,
    a.e_dej2000 as dec_sigma,
    b.filter as filter,
    b.mag_psf as mag,
    b.e_mag_psf as mag_sigma,
    '413'as obscode,
    c.exp_time as exposure_time

    FROM
    (Select image_id, "date", exp_time, filter, object from dr2.images) as c 
    INNER JOIN 
    (select object_id, class_star,image_id,filter,ra_img,decl_img, mag_psf, e_mag_psf from dr2.photometry)as b
    ON b.image_id = c.image_id
    INNER JOIN 
    (select object_id, raj2000,dej2000,e_raj2000,e_dej2000, ngood from dr2.master) as a
    on a.object_id = b.object_id
    
    WHERE c.image_id >= {minID}
    AND  c.image_id <= {maxID}"""
    
    
    tap_results = tap_service.search(query)
    
    if(len(tap_results)==0):
        return None
    
    #hardcode here for second to mju date
    # to_mju = 3.1688087814029E-8 * 365.25
    
    #second to mju Not hardcode version here
    dt2 = TimeDelta(1, format='sec')
    to_mjd = dt2.to_value('jd')   
    
    df = pd.DataFrame(tap_results) 
    
    newStart = start;
    over = (df.shape[0] == 1000000)
    if(over):
        newStart = df.exposure_id[1000000- 1]
        df = df[df['exposure_id'] < newStart] 
    
    #do some format trans here
    df['obscode'] = "Q55"
    df['dec_sigma'] = df['dec_sigma']*u.mas.to(u.deg)
    df['ra_sigma'] = df['ra_sigma']*u.mas.to(u.deg)
    df['mjd_utc'] = df['exposure_time']*to_mjd/2 +df['mjd_utc'] #change data to mid point
    df = df.rename(columns={"obscode":"observatory_code"})
    df = df.astype({"exposure_id": str}, errors='raise') 
    df = df.astype({"observatory_code": str}, errors='raise') 
    df = df.astype({"obs_id": str}, errors='raise') 
    df = df.drop(columns=['exposure_time'])
    #print(df)
    
    #Ready to output 
    DATA_DIR = "/epyc/projects/adam_datasets/skyMapper_dr2/data"
    file_name = os.path.join(DATA_DIR, f"dr2_observations_{date}.h5")
    df.to_hdf(path_or_buf= file_name,index=False,append=True,key='data',format = 'table')
    
    #if reach the row maximum of skyMapper qurey, do recursion
    if (over):
        querySkymapper(start, end, newStart)
    

In [None]:
for i in trange(56763,58192): 
    querySkymapper(i, i+1, -1)

  0%|          | 0/1429 [00:00<?, ?it/s]