In [1]:
from astroquery.utils.tap.core import Tap
import vaex
import numpy as np
from datetime import datetime
from time import time
from astroquery.gaia import Gaia
from os.path import join, abspath
from os import pardir, mkdir
from glob import glob
import sys

In [2]:
# import utils
util_dir = abspath(pardir)
sys.path.insert(0, util_dir)

from utils import timeout, progressbar, appendName

In [3]:
root_data_dir = abspath(join(pardir, "Data"))
root_data_dir

'/home2/s20321005/Thesis-Project/Data'

In [4]:
name = "GUMS"
gaia_data_dir = join(root_data_dir, name)
try:
    mkdir(gaia_data_dir)
    print(f"Creating {gaia_data_dir} dir in Data dir")
except FileExistsError:
    print(f"Directory {gaia_data_dir} already exist. Good to go!")


Creating /home2/s20321005/Thesis-Project/Data/GUMS dir in Data dir


In [9]:
columns = ["ra", "dec", "barycentric_distance", "pmra", "pmdec", "radial_velocity",
              "mag_g", "mag_bp", "mag_rp", "feh", "alphafe", "mass", "population", "logg", "teff", "spectral_type"]


In [6]:
# divide into 360 RAs, depend on preference
ras = np.arange(0,360+0.1, 10).astype(int)
dra = ras[1] - ras[0]
ras

array([  0,  10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120,
       130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250,
       260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 360])

In [7]:
decs = np.arange(-90,90+0.1,30).astype(int)
ddec = decs[1] - decs[0]
decs

array([-90, -60, -30,   0,  30,  60,  90])

In [14]:
# ra0 for lower boundry and ra1 for upper boundary
# same with dec0 and dec1
ORI_TOP = 50_000_000
for i, (ra0, ra1) in enumerate(zip(ras[:-1], ras[1:])):
    TOP = ORI_TOP # cap maximum rows for each response, so that the response is not exploding
    df_com = [] #initial table
    df_com_tmass = [] #initial tmass table
    time0 = time()
    progressbar(0, info=f"{ra0}-{ra1}")
    j = 0
    skip = False
    trying = 0
    while j < len(decs) -1:
        if trying > 15:
            print("too many tries, raise error")
            raise Exception("too many tries")
        if ~skip:
            t0 = time()
        dec0 = decs[j]
        dec1 = decs[j+1]
        # query gaia data
        # taking wider ra and dec constrains than 2MASS, because of different epoch
        query_gaia = f"""
        SELECT TOP {TOP} {', '.join(columns)}
        FROM gaiadr3.gaia_universe_model AS gdr3
        WHERE gdr3.ra BETWEEN {ra0-dra*1} AND {ra1+dra*1}
        AND gdr3.dec BETWEEN {dec0-ddec*1} AND {dec1+ddec*1} 
        AND barycentric_distance < 1000
        """
        job_gaia = timeout(Gaia.launch_job, args=(query_gaia,), timeout_duration=600)
        if job_gaia == None: #if failed, try again
            print("fail to fetch gaia")
            print("length = ", len(df_com))
            skip = True
            trying += 1
            continue
        result_gaia = job_gaia.get_results()
        if(len(result_gaia) == TOP):
            print(f"gaia data is capped, increase TOP | {TOP}")
            TOP *= 2
            skip = True
            continue
        df_pandas = result_gaia.to_pandas()
        df_gaia = vaex.from_pandas(df_pandas)
        if(len(df_com) == 0):
            df_com = df_gaia
        else:
            df_com = df_com.concat(df_gaia)
        j += 1
        t1 = time()
        skip = False
        trying = 0
        TOP = np.max([ORI_TOP, int(len(df_gaia) * 2)])
        progressbar((j)/(len(decs)-1)*100, info=f"{ra0}-{ra1} | [{dec0}]-[{dec1}] | {round(t1-t0,2)} s | gaia = {len(df_gaia)}| TOP = {TOP}")
    time1 = time()  
    df_com.export(join(gaia_data_dir, f"gaia-{ra0:03d}-{ra1:03d}.hdf5"), progress=True)
    print(f"{len(df_com)} || {round((time1-time0)/60, 2)}m")
    print(f"{i} saved {ra0}-{ra1} || {datetime.now()}")
    break


[                                                  ] 0% 0-10
[########                                          ] 17% 0-10 | [-90]-[-60] | 94.47 s | gaia = 405377| TOP = 50000000
[################                                  ] 33% 0-10 | [-60]-[-30] | 100.65 s | gaia = 456171| TOP = 50000000
[#########################                         ] 50% 0-10 | [-30]-[0] | 100.29 s | gaia = 469626| TOP = 50000000
[#################################                 ] 67% 0-10 | [0]-[30] | 98.05 s | gaia = 391296| TOP = 50000000
[#########################################         ] 83% 0-10 | [30]-[60] | 96.27 s | gaia = 420500| TOP = 50000000
[##################################################] 100% 0-10 | [60]-[90] | 97.34 s | gaia = 396824| TOP = 50000000
export(hdf5) [########################################] 100.00% elapsed time  :     0.62s =  0.0m =  0.0h
 2539794 || 9.78m
0 saved 0-10 || 2023-01-15 19:31:13.676671


In [15]:
df_com

#,ra,dec,barycentric_distance,pmra,pmdec,radial_velocity,mag_g,mag_bp,mag_rp,feh,alphafe,mass
0,3.328152196787567,-62.85988211490977,745.2998,2.881009,-1.6458237,4.398307,12.527637,12.809348,12.071688,0.024816787,-0.023430375,1.322
1,3.344352452976034,-62.86537108848968,956.99426,-6.421385,-13.833936,40.408703,15.015748,15.290438,14.563423,-0.526268,0.20037375,0.798
2,3.3443526511756163,-62.86537123714647,956.99426,-6.421385,-13.833936,72.01157,17.676666,18.351528,16.852684,-0.526268,0.20037375,0.554
3,3.3443525342160187,-62.86537114937915,956.99426,-6.421385,-13.833936,53.3584,14.925948,15.227538,14.439001,-0.526268,0.20037375,1.352
4,3.377595130219574,-62.947907937316316,859.5647,6.3464227,2.3787494,105.689285,15.545778,15.90097,15.00282,-0.7232686,0.28104624,0.725
...,...,...,...,...,...,...,...,...,...,...,...,...
2539789,14.40905085645599,46.9281546194901,524.09796,2.9324734,-3.1217618,-4.5120153,11.500986,11.797072,11.02801,0.083146974,0.025285967,1.367
2539790,14.40903840122083,46.928144073782164,524.09796,2.9324734,-3.1217616,-11.256626,20.454464,21.84714,19.234615,0.083146974,0.025285967,0.189
2539791,14.409049343582893,46.92815333854822,524.09796,2.9324734,-3.1217618,-5.3312516,11.500701,11.796968,11.027444,0.083146974,0.025285967,1.556
2539792,2.775249879171253,62.999197376087,611.92957,-5.5473104,9.140197,-5.052528,19.225765,20.53301,18.051123,0.019185044,0.021308241,0.379
