In [1]:
from astroquery.utils.tap.core import Tap
import vaex
from datetime import datetime
from time import time
from glob import glob
import sys
from os.path import join, abspath
from os import pardir, curdir, mkdir
import numpy as np

In [2]:
# import utils
util_dir = abspath(pardir)
sys.path.insert(0, util_dir)

from utils import timeout, progressbar

In [3]:
name = "TWOMASS-15.8"
root_data_dir = abspath(join(pardir, "Data"))
data_dir = join(root_data_dir, name)
try:
  mkdir(data_dir)
  print(f"Creating {name} dir in Data dir")
except FileExistsError:
  print("Directory already exist. Good to go!")
data_dir

Directory already exist. Good to go!


'/home2/s20321005/Thesis-Project/Data/TWOMASS-15.8'

In [4]:
# 2MASS,
tap_tmass = Tap(url="https://irsa.ipac.caltech.edu/TAP/sync")

columns_tmass = ["ra", "dec","j_m", "h_m", "k_m"]
columns_tmass_names = ["ra", "dec", "Jmag", "Hmag", "Kmag"]

In [8]:
# divide into 360 RAs, depend on preference
ras = np.arange(276,361, 3).astype(int)
ras

array([276, 279, 282, 285, 288, 291, 294, 297, 300, 303, 306, 309, 312,
       315, 318, 321, 324, 327, 330, 333, 336, 339, 342, 345, 348, 351,
       354, 357, 360])

In [11]:
Ra0 = np.array([273])
Ra1 = np.array([276])

In [6]:
des = np.linspace(-90, 90, 19).astype(int)
des

array([-90, -80, -70, -60, -50, -40, -30, -20, -10,   0,  10,  20,  30,
        40,  50,  60,  70,  80,  90])

In [12]:
TOP = 2_000_000 # cap maximum rows for each response, so that the response is not exploding

# ra0 for lower boundry and ra1 for upper boundary
# same with dec0 and dec1
# download 2MASS with J < 13.5
for i, (ra0, ra1) in enumerate(zip(Ra0, Ra1)):
    # if i > 6: break
    df_com = [] #initial table
    time0 = time() 
    print(f"{i} downloading... {ra0}-{ra1}")
    progressbar(0)
    j = 0
    while j < len(des) - 1:
        dec0 = des[j]
        dec1 = des[j+1]
        t0 = time()  
        query_tmass = f"""
        SELECT TOP {TOP} {", ".join(columns_tmass)}
        FROM fp_psc
        WHERE ra BETWEEN {ra0} AND {ra1}
        AND dec BETWEEN {dec0} AND {dec1}
        AND ph_qual LIKE 'A__'
        AND ph_qual LIKE '__A'
        AND j_m < 15.8
        """
        job_tmass = timeout(tap_tmass.launch_job, args=(query_tmass,), timeout_duration=300)
        if job_tmass == None: 
            print(job_tmass)
            print("fail to fetch tmass")
            print("length = ", len(df_com))
            continue
        result_tmass = job_tmass.get_results()
        progressbar((j+1)/(len(des)-1)*100)
        df_tmass = result_tmass.to_pandas()
        df_tmass.columns = columns_tmass_names
        df_tmass = vaex.from_pandas(df_tmass)
        if len(df_com) == 0:
            df_com = df_tmass
        else:
            df_com = df_com.concat(df_tmass)
        j += 1
        t1 = time()  
        print(f" {len(df_tmass)} || {round((t1-t0), 2)}s")
    time1 = time()  
    df_com.export(join(data_dir, f"tmass-{ra0:03d}-{ra1:03d}.hdf5"), progress=True)
    print(f"{len(df_com)} || {round((time1-time0)/60, 2)}m")
    print(f"{i} saved {ra0}-{ra1} || {datetime.now()}")

0 downloading... 249-252
[##                                                ]6% 7121 || 44.05s
[#####                                             ]11% 39242 || 105.4s
[########                                          ]17% 121831 || 66.27s
[###########                                       ]22% 519042 || 66.35s
[#############                                     ]28% 1051779 || 172.81s
[################                                  ]33% 556206 || 77.1s
[###################                               ]39% 234197 || 62.82s
[######################                            ]44% 116550 || 60.35s
[#########################                         ]50% 80115 || 47.68s
[###########################                       ]56% 61667 || 56.56s
[##############################                    ]61% 43810 || 55.33s
[#################################                 ]67% 35168 || 45.65s
[####################################              ]72% 28521 || 25.35s
[#################################

## Preview

In [None]:
files = glob(join(data_dir, "*.hdf5"))
files

In [None]:
vaex.open_many(files)