There are two way to download RAVE:
1. From website
2. Tap Vizier

choose one of the step only

# 1. From website

Query rave6 from their website: https://www.rave-survey.org/query/ 

![rave query page](<images/rave-query-page.png>)

#### The query

divide into multiple RAs for smaller response, if needed

```
SELECT TOP 150000 sparv."hrv_sparv" AS rv_rave, sparv."hrv_error_sparv" AS e_rv_rave, aux."teff_sparv" AS teff_rave, aux."logg_sparv" AS logg_rave, aux."m_h_sparv" AS mh_rave, aux."alpha_sparv" AS alphafe_rave, gaia."source_id" 
FROM ravedr6.dr6_sparv AS sparv 
JOIN ravedr6.dr6_sparv_aux AS aux ON sparv.rave_obs_id = aux.rave_obs_id 
RIGHT JOIN ravedr6.dr6_x_gaiaedr3 AS gaia ON gaia.rave_obs_id = sparv.rave_obs_id 
WHERE gaia.ra BETWEEN 0 AND 60
```

Change `WHERE gaia.ra BETWEEN 0 AND 60` into other interval

save the downloaded files to `Thesis-Project/Data/RAVE6`

# 2. TAP RAVE VO

In [16]:
from astroquery.utils.tap.core import Tap
from os.path import join, abspath
from os import pardir, curdir, mkdir
from glob import glob
import sys
import numpy as np
import pandas as pd
import vaex
from astropy.table import Table



In [2]:
# import utils
util_dir = abspath(pardir)
sys.path.insert(0, util_dir)

from utils import timeout, appendName

In [3]:
name = "RAVE6"
root_data_dir = abspath(join(pardir, "Data"))
data_dir = join(root_data_dir, name)
try:
  mkdir(data_dir)
  print(f"Creating {name} dir in Data dir")
except FileExistsError:
  print("Directory already exist. Good to go!")
data_dir

Directory already exist. Good to go!


'/home2/s20321005/Thesis-Project/Data/RAVE6'

In [6]:
# Vizier tap endpoint
# # tap = Tap(url="http://tapvizier.u-strasbg.fr/TAPVizieR/tap/sync/")
# tap = Tap(url="https://www.rave-survey.org/tap/")
# # tables = tap.load_table('')
# # for table in tables:
# #   print(table.get_qualified_name())
# columns_main = ["rv_rave", "e_HRV AS e_rv_rave"]
# # columns_aux = ["Teffs AS teff_rave", "loggs AS logg_rave", 
# #                "[M/H]s AS mh_rave", "[a/Fe]s AS alphafe_rave"]
# # columns_xmatch = ["Gaiae3 AS source_id"]

# columns_main = list(map(lambda x: appendName(x, "main"), columns_main))
# # columns_aux = list(map(lambda x: appendName(x, "aux"), columns_aux))
# # columns_xmatch = list(map(lambda x: appendName(x, "xmatch"), columns_xmatch))

# # columns = columns_xmatch + columns_aux + columns_main
# columns

In [4]:
# RAVE tap endpoint
# tap = Tap(url="http://tapvizier.u-strasbg.fr/TAPVizieR/tap/sync/")
tap = Tap(url="https://www.rave-survey.org/tap/")

columns_sparv = ["hrv_sparv AS rv_rave", "hrv_error_sparv AS e_rv_rave"]
columns_aux = ["teff_sparv AS teff_rave", "logg_sparv AS logg_rave", 
               "m_h_sparv AS mh_rave", "alpha_sparv AS alphafe_rave"]
columns_xmatch = ["source_id"]

sparv_table = "ravedr6.dr6_sparv"
aux_table = "ravedr6.dr6_sparv_aux"
xmatch_table = "ravedr6.dr6_x_gaiaedr3"
columns_sparv = list(map(lambda x: appendName(x, sparv_table), columns_sparv))
columns_aux = list(map(lambda x: appendName(x, aux_table), columns_aux))
columns_xmatch = list(map(lambda x: appendName(x, xmatch_table), columns_xmatch))

columns = columns_sparv + columns_aux + columns_xmatch
columns

['ravedr6.dr6_sparv."hrv_sparv" AS rv_rave',
 'ravedr6.dr6_sparv."hrv_error_sparv" AS e_rv_rave',
 'ravedr6.dr6_sparv_aux."teff_sparv" AS teff_rave',
 'ravedr6.dr6_sparv_aux."logg_sparv" AS logg_rave',
 'ravedr6.dr6_sparv_aux."m_h_sparv" AS mh_rave',
 'ravedr6.dr6_sparv_aux."alpha_sparv" AS alphafe_rave',
 'ravedr6.dr6_x_gaiaedr3."source_id"']

In [5]:
# divide ra into multiple partitions
ras = np.linspace(0, 360, 10).astype(int)
ras

array([  0,  40,  80, 120, 160, 200, 240, 280, 320, 360])

In [6]:
", ".join(columns)

'ravedr6.dr6_sparv."hrv_sparv" AS rv_rave, ravedr6.dr6_sparv."hrv_error_sparv" AS e_rv_rave, ravedr6.dr6_sparv_aux."teff_sparv" AS teff_rave, ravedr6.dr6_sparv_aux."logg_sparv" AS logg_rave, ravedr6.dr6_sparv_aux."m_h_sparv" AS mh_rave, ravedr6.dr6_sparv_aux."alpha_sparv" AS alphafe_rave, ravedr6.dr6_x_gaiaedr3."source_id"'

In [15]:
for ra_lower, ra_upper in zip(ras[:-1], ras[1:]):
    TOP = 100000
    while True:
        print("querying", ra_lower, ra_upper)
        query = f"""
        SELECT TOP {TOP} {", ".join(columns)}
        FROM {sparv_table}
        JOIN {aux_table} ON {sparv_table}.rave_obs_id = {aux_table}.rave_obs_id
        JOIN {xmatch_table} ON {sparv_table}.rave_obs_id = {xmatch_table}.rave_obs_id
        WHERE {xmatch_table}.ra BETWEEN {ra_lower} AND {ra_upper}
        """
        job = timeout(tap.launch_job_async, args=(query,), timeout_duration=120, minVal=0.2)
        result = job.get_results()
        if len(result) != TOP:
            df = result.to_pandas()
            df = vaex.from_pandas(df)
            df.export_hdf5(join(data_dir, f"rave-{ra_lower:03d}-{ra_upper:03d}.hdf5"))
            break
        TOP *= 2
        print("capped at", TOP)

querying 0 40
INFO: Query finished. [astroquery.utils.tap.core]
querying 40 80
INFO: Query finished. [astroquery.utils.tap.core]
querying 80 120
INFO: Query finished. [astroquery.utils.tap.core]
querying 120 160
INFO: Query finished. [astroquery.utils.tap.core]
querying 160 200
INFO: Query finished. [astroquery.utils.tap.core]
querying 200 240
INFO: Query finished. [astroquery.utils.tap.core]
querying 240 280
INFO: Query finished. [astroquery.utils.tap.core]
querying 280 320
INFO: Query finished. [astroquery.utils.tap.core]
querying 320 360
INFO: Query finished. [astroquery.utils.tap.core]


# Cleaning

In [17]:
files = glob(join(data_dir, "*.hdf5"))
files.sort()
files

['/home2/s20321005/Thesis-Project/Data/RAVE6/rave-000-040.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-040-080.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-080-120.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-120-160.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-160-200.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-200-240.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-240-280.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-280-320.hdf5',
 '/home2/s20321005/Thesis-Project/Data/RAVE6/rave-320-360.hdf5']

In [18]:
rave = vaex.open_many(files)
rave

#,rv_rave,e_rv_rave,teff_rave,logg_rave,mh_rave,alphafe_rave,source_id
0,77.837,1.993,4962.0,3.24,-0.61,0.07,4976851014178872448
1,34.994,2.406,4969.0,3.5,-0.15,0.14,4976832150682472320
2,-32.216,3.533,3709.0,2.59,-0.17,0.4,4976765595869168512
3,-2.564,6.833,3813.0,2.89,0.38,0.38,4976544490953365632
4,9.119,4.21,5244.0,4.08,0.07,0.01,4973522036567243264
...,...,...,...,...,...,...,...
517352,-7.549,1.682,6710.0,4.78,-0.27,0.0,6459331406720374912
517353,-22.461,1.746,6196.0,4.96,-0.06,0.02,6459386657179753984
517354,-1.974,1.182,4255.0,1.53,-0.54,0.22,6459807009922689920
517355,-11.966,2.76,6208.0,4.61,0.02,0.0,6459288491407088768


In [20]:
# remove duplicates with pandas
df = rave.to_pandas_df()
df = df.drop_duplicates("source_id")
rave = vaex.from_pandas(df)
rave

#,rv_rave,e_rv_rave,teff_rave,logg_rave,mh_rave,alphafe_rave,source_id
0,77.837,1.993,4962.0,3.24,-0.61,0.07,4976851014178872448
1,34.994,2.406,4969.0,3.5,-0.15,0.14,4976832150682472320
2,-32.216,3.533,3709.0,2.59,-0.17,0.4,4976765595869168512
3,-2.564,6.833,3813.0,2.89,0.38,0.38,4976544490953365632
4,9.119,4.21,5244.0,4.08,0.07,0.01,4973522036567243264
...,...,...,...,...,...,...,...
450973,-3.353,2.577,5430.0,4.23,0.22,0.05,6523068137278802048
450974,-0.753,1.277,5908.0,4.32,-0.01,0.11,6523116137833329792
450975,-1.839,2.183,5726.0,3.84,0.01,0.18,6523133145903856768
450976,7.603,1.202,3927.0,4.07,-0.41,0.0,6524623911873631104


In [22]:
# export to hdf5
rave.export(join(data_dir, "rave6.hdf5"), progress=True)

export(hdf5) [########################################] 100.00% elapsed time  :     0.06s =  0.0m =  0.0h
 