In [1]:
from astroquery.utils.tap.core import Tap
import vaex
from datetime import datetime
from time import time
from glob import glob
import sys
from os.path import join, abspath
from os import pardir, curdir, mkdir
import numpy as np

In [2]:
# import utils
util_dir = abspath(pardir)
sys.path.insert(0, util_dir)

from utils import timeout, progressbar

In [4]:
name = "TWOMASS-TGAS"
root_data_dir = abspath(join(pardir, "Data"))
data_dir = join(root_data_dir, name)
try:
  mkdir(data_dir)
  print(f"Creating {name} dir in Data dir")
except FileExistsError:
  print("Directory already exist. Good to go!")
data_dir

Creating TWOMASS-TGAS dir in Data dir


'/home2/s20321005/Thesis-Project/Data/TWOMASS-TGAS'

In [9]:
# 2MASS,
tap_tmass = Tap(url="https://irsa.ipac.caltech.edu/TAP/sync")

columns_tmass = ["ra", "dec","j_m", "h_m", "k_m"]
columns_tmass_names = ["ra", "dec", "Jmag", "Hmag", "Kmag"]

In [50]:
# divide into 360 RAs, depend on preference
ras = np.arange(260,361, 10).astype(int)
ras

array([260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 360])

In [48]:
des = np.linspace(-90, 90, 19)
des

array([-90., -80., -70., -60., -50., -40., -30., -20., -10.,   0.,  10.,
        20.,  30.,  40.,  50.,  60.,  70.,  80.,  90.])

In [7]:
TOP = 500_000 # cap maximum rows for each response, so that the response is not exploding

# ra0 for lower boundry and ra1 for upper boundary
# same with dec0 and dec1
# download 2MASS with J < 13.5
for i, (ra0, ra1) in enumerate(zip(ras[:-1], ras[1:])):
    if i > 6: break
    df_com = [] #initial table
    time0 = time() 
    print(f"{i} downloading... {ra0}-{ra1}")
    progressbar(0)
    j = 0
    while j < len(des) - 1:
        dec0 = des[j]
        dec1 = des[j+1]
        
        query_tmass = f"""
        SELECT TOP {TOP} {", ".join(columns_tmass)} 
        FROM fp_psc
        WHERE ra BETWEEN {ra0} AND {ra1}
        AND dec BETWEEN {dec0} AND {dec1}
        AND j_m < 13.5
        """
        job_tmass = timeout(tap_tmass.launch_job, args=(query_tmass,), timeout_duration=180)
        if job_tmass == None: 
            print(job_tmass)
            print("fail to fetch tmass")
            print("length = ", len(df_com))
            continue
        result_tmass = job_tmass.get_results()
        progressbar((j+1)/(len(des)-1)*100)
        df_tmass = result_tmass.to_pandas()
        df_tmass.columns = columns_tmass_names
        df_tmass = vaex.from_pandas(df_tmass)
        if len(df_com) == 0:
            df_com = df_tmass
        else:
            df_com = df_com.concat(df_tmass)
        j += 1
        # break
    time1 = time()  
    df_com.export(join(data_dir, f"tmass-{ra0:03d}-{ra1:03d}.hdf5"), progress=True)
    print(f"{len(df_com)} || {round((time1-time0)/60, 2)}m")
    print(f"{i} saved {ra0}-{ra1} || {datetime.now()}")

0 downloading... 0-10
export(hdf5) [########################################] 100.00% elapsed time  :     0.07s =  0.0m =  0.0h
 528108 || 9.7m
0 saved 0-10 || 2022-07-17 02:46:43.014601
1 downloading... 10-20
export(hdf5) [########################################] 100.00% elapsed time  :     0.06s =  0.0m =  0.0h
 507529 || 12.32m
1 saved 10-20 || 2022-07-17 02:59:02.106177
2 downloading... 20-30
export(hdf5) [########################################] 100.00% elapsed time  :     0.06s =  0.0m =  0.0h
 501555 || 10.52m
2 saved 20-30 || 2022-07-17 03:09:33.201714
3 downloading... 30-40
export(hdf5) [########################################] 100.00% elapsed time  :     0.05s =  0.0m =  0.0h
 515716 || 11.48m
3 saved 30-40 || 2022-07-17 03:21:02.164384
4 downloading... 40-50
export(hdf5) [########################################] 100.00% elapsed time  :     0.07s =  0.0m =  0.0h
 524734 || 11.52m
4 saved 40-50 || 2022-07-17 03:32:33.230023
5 downloading... 50-60
export(hdf5) [############

## Preview

In [5]:
files = glob(join(data_dir, "*.hdf5"))
files

['/home2/s20321005/Thesis-Project/Data/TWOMASS/tmass-070-080.hdf5',
 '/home2/s20321005/Thesis-Project/Data/TWOMASS/tmass-060-070.hdf5',
 '/home2/s20321005/Thesis-Project/Data/TWOMASS/tmass-010-020.hdf5',
 '/home2/s20321005/Thesis-Project/Data/TWOMASS/tmass-320-330.hdf5',
 '/home2/s20321005/Thesis-Project/Data/TWOMASS/tmass-200-210.hdf5',
 '/home2/s20321005/Thesis-Project/Data/TWOMASS/tmass-340-350.hdf5',
 '/home2/s20321005/Thesis-Project/Data/TWOMASS/tmass-090-100.hdf5',
 '/home2/s20321005/Thesis-Project/Data/TWOMASS/tmass-120-130.hdf5',
 '/home2/s20321005/Thesis-Project/Data/TWOMASS/tmass-080-090.hdf5',
 '/home2/s20321005/Thesis-Project/Data/TWOMASS/tmass-350-360.hdf5',
 '/home2/s20321005/Thesis-Project/Data/TWOMASS/tmass-250-260.hdf5',
 '/home2/s20321005/Thesis-Project/Data/TWOMASS/tmass-280-290.hdf5',
 '/home2/s20321005/Thesis-Project/Data/TWOMASS/tmass-210-220.hdf5',
 '/home2/s20321005/Thesis-Project/Data/TWOMASS/tmass-150-160.hdf5',
 '/home2/s20321005/Thesis-Project/Data/TWOMASS/t

In [6]:
vaex.open_many(files)

#,ra,dec,Jmag,Hmag,Kmag
0,70.000147,-73.327232,10.583,9.985,9.717
1,70.000158,-73.298195,12.086,11.151,10.797
2,70.000188,-79.923325,13.487,13.145,13.07
3,70.000465,-89.049103,12.022,11.554,11.451
4,70.000511,-76.362122,11.954,11.548,11.413
...,...,...,...,...,...
37995329,109.959101,71.371353,12.514,12.192,12.19
37995330,109.96032,71.409782,11.604,11.099,11.043
37995331,109.967031,70.026169,12.273,11.984,11.952
37995332,109.973617,77.130386,13.272,12.965,12.945
