In [None]:
from astroquery.utils.tap.core import Tap
import vaex
import numpy as np
from datetime import datetime
from time import time
import signal
from glob import glob
from matplotlib import pyplot as plt

In [None]:
# progress bar
def progress(percent=0, width=50):
    left = int((width * percent) // 100)
    right = width - left
    
    tags = "#" * left
    spaces = " " * right
    percents = f"{percent:.0f}%"
    
    print("\r[", tags, spaces, "]", percents, sep="", end="", flush=True)

In [None]:
# add timeout, such that sending request again after some period of time
def timeout(func, args=(), kwargs={}, timeout_duration=1, default=None):
    import signal
    from time import time
    from requests import HTTPError
    from time import sleep

    class TimeoutError(Exception):
        pass

    def handler(signum, frame):
        raise TimeoutError()

    # set the timeout handler
    t0 = time()
    signal.signal(signal.SIGALRM, handler) 
    signal.alarm(timeout_duration)
    try:
        result = func(*args, **kwargs)
    except TimeoutError as exc:
        result = default
        t1 = time()
        print("too long, requesting again...")
        print(f"time = {round(t1-t0,2)}s")
    except HTTPError:
        result = default
        t1 = time()
        if(t1-t0 < 1):
            print("service unavailable, sleep for 300s")
            print(f"time = {round(t1-t0,2)}s")
            sleep(300)
            print("continue")
        else:
            print("server not responding, try again")
            print(f"time = {round(t1-t0,2)}s")
    except KeyboardInterrupt:
        raise KeyboardInterrupt
    except :
        result = default
        t1 = time()
        print("some error")
        print(f"time = {round(t1-t0,2)}s")
    finally:
        signal.alarm(0)
    
    return result

In [None]:
# 2MASS,
tap_tmass = Tap(url="https://irsa.ipac.caltech.edu/TAP/sync")

columns_tmass = ["ra", "dec","j_m", "h_m", "k_m"]
columns_tmass_names = ["ra", "dec", "Jmag", "Hmag", "Kmag"]

In [None]:
# divide into 360 RAs, depend on preference
ras = np.arange(0,361, 10).astype(int)
ras

In [None]:
des = np.linspace(-90, 90, 16)
des

In [None]:
TOP = 100_000 # cap maximum rows for each response, so that the response is not exploding

# ra0 for lower boundry and ra1 for upper boundary
# same with dec0 and dec1
for i, (ra0, ra1) in enumerate(zip(ras[:-1], ras[1:])):
    if i <= 23: continue
    df_com = [] #initial table
    time0 = time() 
    # print(f"{i} downloading... {ra0}-{ra1}")
    progress(0)
    j = 0
    while j < len(des) - 1:
        dec0 = des[j]
        dec1 = des[j+1]
        query_tmass = f"""
        SELECT TOP {TOP} {", ".join(columns_tmass)} 
        FROM fp_psc
        WHERE ra BETWEEN {ra0} AND {ra1}
        AND dec BETWEEN {dec0} AND {dec1}
        AND j_m < 11
        """
        job_tmass = timeout(tap_tmass.launch_job, args=(query_tmass,), timeout_duration=180)
        if job_tmass == None: 
            print(job_tmass)
            print("fail to fetch tmass")
            print("length = ", len(df_com))
            continue
        result_tmass = job_tmass.get_results()
        progress((j+1)/(len(des)-1)*100)
        df_tmass = result_tmass.to_pandas()
        df_tmass.columns = columns_tmass_names
        df_tmass = vaex.from_pandas(df_tmass)
        if len(df_com) == 0:
            df_com = df_tmass
        else:
            df_com = df_com.concat(df_tmass)
        j += 1
    time1 = time()  
    df_com.export(f"tmass-{ra0:03d}-{ra1:03d}.hdf5", progress=True)
    print(f"{len(df_com)} || {round((time1-time0)/60, 2)}m")
    print(f"{i} saved {ra0}-{ra1} || {datetime.now()}")