There are two way to download RAVE:
1. From website
2. Tap Vizier

choose one of the step only

# 1. From website

Query rave6 from their website: https://www.rave-survey.org/query/ 

![rave query page](<images/rave-query-page.png>)

#### The query

divide into multiple RAs for smaller response, if needed

```
SELECT TOP 150000 sparv."hrv_sparv" AS rv_rave, sparv."hrv_error_sparv" AS e_rv_rave, aux."teff_sparv" AS teff_rave, aux."logg_sparv" AS logg_rave, aux."m_h_sparv" AS mh_rave, aux."alpha_sparv" AS alphafe_rave, gaia."source_id" 
FROM ravedr6.dr6_sparv AS sparv 
JOIN ravedr6.dr6_sparv_aux AS aux ON sparv.rave_obs_id = aux.rave_obs_id 
RIGHT JOIN ravedr6.dr6_x_gaiaedr3 AS gaia ON gaia.rave_obs_id = sparv.rave_obs_id 
WHERE gaia.ra BETWEEN 0 AND 60
```

Change `WHERE gaia.ra BETWEEN 0 AND 60` into other interval

save the downloaded files to `Thesis-Project/Data/RAVE6`

# 2. TAP RAVE VO

In [13]:
from astroquery.utils.tap.core import Tap
from os.path import join, abspath
from os import pardir, curdir, mkdir
from glob import glob
import sys
import numpy as np
import pandas as pd
import vaex
from astropy.table import Table

In [16]:
# import utils
util_dir = abspath(pardir)
sys.path.insert(0, util_dir)

from utils import append_name, launch_job, check_df, safe_mkdir

In [17]:
name = "RAVE6"
root_data_dir = abspath(join(pardir, "Data"))
data_dir = join(root_data_dir, name)
safe_mkdir(data_dir)

Directory /home2/s20321005/Thesis-Project/Data/RAVE6 already exist. Good to go!


In [4]:
# Vizier tap endpoint
# # tap = Tap(url="http://tapvizier.u-strasbg.fr/TAPVizieR/tap/sync/")
# tap = Tap(url="https://www.rave-survey.org/tap/")
# # tables = tap.load_table('')
# # for table in tables:
# #   print(table.get_qualified_name())
# columns_main = ["rv_rave", "e_HRV AS e_rv_rave"]
# # columns_aux = ["Teffs AS teff_rave", "loggs AS logg_rave", 
# #                "[M/H]s AS mh_rave", "[a/Fe]s AS alphafe_rave"]
# # columns_xmatch = ["Gaiae3 AS source_id"]

# columns_main = list(map(lambda x: appendName(x, "main"), columns_main))
# # columns_aux = list(map(lambda x: appendName(x, "aux"), columns_aux))
# # columns_xmatch = list(map(lambda x: appendName(x, "xmatch"), columns_xmatch))

# # columns = columns_xmatch + columns_aux + columns_main
# columns

In [5]:
# RAVE tap endpoint
# tap = Tap(url="http://tapvizier.u-strasbg.fr/TAPVizieR/tap/sync/")
tap = Tap(url="https://www.rave-survey.org/tap/")

columns_sparv = ["hrv_sparv AS rv_rave", "hrv_error_sparv AS e_rv_rave"]
columns_aux = ["teff_sparv AS teff_rave", "logg_sparv AS logg_rave", 
               "m_h_sparv AS mh_rave", "alpha_sparv AS alphafe_rave"]
columns_xmatch = ["source_id"]

sparv_table = "ravedr6.dr6_sparv"
aux_table = "ravedr6.dr6_sparv_aux"
xmatch_table = "ravedr6.dr6_x_gaiaedr3"
columns_sparv = list(map(lambda x: append_name(x, sparv_table), columns_sparv))
columns_aux = list(map(lambda x: append_name(x, aux_table), columns_aux))
columns_xmatch = list(map(lambda x: append_name(x, xmatch_table), columns_xmatch))

columns = columns_sparv + columns_aux + columns_xmatch
columns

['ravedr6.dr6_sparv."hrv_sparv" AS rv_rave',
 'ravedr6.dr6_sparv."hrv_error_sparv" AS e_rv_rave',
 'ravedr6.dr6_sparv_aux."teff_sparv" AS teff_rave',
 'ravedr6.dr6_sparv_aux."logg_sparv" AS logg_rave',
 'ravedr6.dr6_sparv_aux."m_h_sparv" AS mh_rave',
 'ravedr6.dr6_sparv_aux."alpha_sparv" AS alphafe_rave',
 'ravedr6.dr6_x_gaiaedr3."source_id"']

# Test Query

In [6]:
TOP = 100_000
query = f"""SELECT TOP {TOP} {", ".join(columns)}
FROM {sparv_table}
JOIN {aux_table} ON {sparv_table}.rave_obs_id = {aux_table}.rave_obs_id
JOIN {xmatch_table} ON {sparv_table}.rave_obs_id = {xmatch_table}.rave_obs_id
WHERE {xmatch_table}.ra BETWEEN {0} AND {10}
"""
df = launch_job(tap.launch_job, query, duration=60)
df

#,hrv_sparv,hrv_error_sparv,teff_sparv,logg_sparv,m_h_sparv,alpha_sparv,source_id
0,-2.564,6.833,3813.0,2.89,0.38,0.38,4976544490953365632
1,9.119,4.21,5244.0,4.08,0.07,0.01,4973522036567243264
2,77.837,1.993,4962.0,3.24,-0.61,0.07,4976851014178872448
3,-37.893,4.791,3953.0,2.41,0.11,0.13,4973212867641442560
4,52.252,2.572,4423.0,2.85,-0.39,0.17,4973611303167610496
...,...,...,...,...,...,...,...
13283,-21.983,1.564,6461.0,4.89,-0.27,0.04,4702435241420083712
13284,-18.22,1.364,5734.0,4.34,-0.31,0.21,4702833505145819136
13285,37.739,5.139,4736.0,4.01,0.23,0.08,4690043337353950208
13286,37.162,4.149,4162.0,2.34,-0.34,0.05,4690063983264790784


In [7]:
# divide ra into multiple partitions
ras = np.linspace(0, 360, 37).astype(int)
ras

array([  0,  10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120,
       130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250,
       260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 360])

In [8]:
force_break = False
for ra_lower, ra_upper in zip(ras[:-1], ras[1:]):
    TOP = 100000
    while True:
        print("querying", ra_lower, ra_upper)
        query = f"""
        SELECT TOP {TOP} {", ".join(columns)}
        FROM {sparv_table}
        JOIN {aux_table} ON {sparv_table}.rave_obs_id = {aux_table}.rave_obs_id
        JOIN {xmatch_table} ON {sparv_table}.rave_obs_id = {xmatch_table}.rave_obs_id
        WHERE {xmatch_table}.ra BETWEEN {ra_lower} AND {ra_upper}
        """
        df = launch_job(tap.launch_job, query, duration=60)
        check = check_df(df, TOP)
        force_break = check['force_break']
        if force_break: break
        retry = check['retry']
        TOP = check['new_top']
        if retry: 
            print(f"retrying, previous top: {len(df)}, new top: {TOP}")
            continue
        # everything is good
        break
    if force_break: break
    print(len(df), TOP)
    df.export(join(data_dir, f"rave-{ra_lower:03d}-{ra_upper:03d}.hdf5"), progress=True)

querying 0 10
13288 26576
export(hdf5) [########################################] 100.00% elapsed time  :     0.03s =  0.0m =  0.0h
 querying 10 20
11349 22698
export(hdf5) [########################################] 100.00% elapsed time  :     0.03s =  0.0m =  0.0h
 querying 20 30
12119 24238
export(hdf5) [########################################] 100.00% elapsed time  :     0.03s =  0.0m =  0.0h
 querying 30 40
12927 25854
export(hdf5) [########################################] 100.00% elapsed time  :     0.03s =  0.0m =  0.0h
 querying 40 50
11226 22452
export(hdf5) [########################################] 100.00% elapsed time  :     0.03s =  0.0m =  0.0h
 querying 50 60
11269 22538
export(hdf5) [########################################] 100.00% elapsed time  :     0.03s =  0.0m =  0.0h
 querying 60 70
12422 24844
export(hdf5) [########################################] 100.00% elapsed time  :     0.03s =  0.0m =  0.0h
 querying 70 80
14405 28810
export(hdf5) [######################

# Cleaning

In [18]:
files = glob(join(data_dir, "*.hdf5"))
files.sort()
files

['/home2/s20321005/Thesis-Project/Data/RAVE6/rave-000-010.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-010-020.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-020-030.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-030-040.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-040-050.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-050-060.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-060-070.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-070-080.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-080-090.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-090-100.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-100-110.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-110-120.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-120-130.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-130-140.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-140-150.hdf5',
 '/home2/s20321005/Thesis

In [19]:
rave = vaex.open_many(files)
rave

#,hrv_sparv,hrv_error_sparv,teff_sparv,logg_sparv,m_h_sparv,alpha_sparv,source_id
0,-37.893,4.791,3953.0,2.41,0.11,0.13,4973212867641442560
1,4.044,6.539,8286.0,4.91,0.39,0.0,4973250147957768832
2,-2.564,6.833,3813.0,2.89,0.38,0.38,4976544490953365632
3,77.837,1.993,4962.0,3.24,-0.61,0.07,4976851014178872448
4,3.519,4.035,4171.0,2.61,-0.14,0.19,4972468120311892096
...,...,...,...,...,...,...,...
968330,-0.753,1.277,5908.0,4.32,-0.01,0.11,6523116137833329792
968331,-1.839,2.183,5726.0,3.84,0.01,0.18,6523133145903856768
968332,7.603,1.202,3927.0,4.07,-0.41,0.0,6524623911873631104
968333,25.576,1.922,5654.0,4.33,0.05,0.19,6522142314129013504


In [20]:
# remove duplicates with pandas
df = rave.to_pandas_df()
df = df.drop_duplicates("source_id")
rave = vaex.from_pandas(df)
rave

#,hrv_sparv,hrv_error_sparv,teff_sparv,logg_sparv,m_h_sparv,alpha_sparv,source_id
0,-37.893,4.791,3953.0,2.41,0.11,0.13,4973212867641442560
1,4.044,6.539,8286.0,4.91,0.39,0.0,4973250147957768832
2,-2.564,6.833,3813.0,2.89,0.38,0.38,4976544490953365632
3,77.837,1.993,4962.0,3.24,-0.61,0.07,4976851014178872448
4,3.519,4.035,4171.0,2.61,-0.14,0.19,4972468120311892096
...,...,...,...,...,...,...,...
450973,-0.753,1.277,5908.0,4.32,-0.01,0.11,6523116137833329792
450974,-1.839,2.183,5726.0,3.84,0.01,0.18,6523133145903856768
450975,7.603,1.202,3927.0,4.07,-0.41,0.0,6524623911873631104
450976,25.576,1.922,5654.0,4.33,0.05,0.19,6522142314129013504


In [24]:
columns = rave.column_names
columns

['hrv_sparv',
 'hrv_error_sparv',
 'teff_sparv',
 'logg_sparv',
 'm_h_sparv',
 'alpha_sparv',
 'source_id']

In [25]:
columns_new = ['rv_rave',
 'e_rv_rave',
 'teff_rave',
 'logg_rave',
 'mh_rave',
 'alphafe_rave',
 'source_id']

for old, new in zip(columns, columns_new):
    rave.rename(old, new)
rave

#,rv_rave,e_rv_rave,teff_rave,logg_rave,mh_rave,alphafe_rave,source_id
0,-37.893,4.791,3953.0,2.41,0.11,0.13,4973212867641442560
1,4.044,6.539,8286.0,4.91,0.39,0.0,4973250147957768832
2,-2.564,6.833,3813.0,2.89,0.38,0.38,4976544490953365632
3,77.837,1.993,4962.0,3.24,-0.61,0.07,4976851014178872448
4,3.519,4.035,4171.0,2.61,-0.14,0.19,4972468120311892096
...,...,...,...,...,...,...,...
450973,-0.753,1.277,5908.0,4.32,-0.01,0.11,6523116137833329792
450974,-1.839,2.183,5726.0,3.84,0.01,0.18,6523133145903856768
450975,7.603,1.202,3927.0,4.07,-0.41,0.0,6524623911873631104
450976,25.576,1.922,5654.0,4.33,0.05,0.19,6522142314129013504


In [26]:
# export to hdf5
rave.export(join(data_dir, "rave6.hdf5"), progress=True)

export(hdf5) [########################################] 100.00% elapsed time  :     0.08s =  0.0m =  0.0h
 