There are two way to download RAVE:
1. From website
2. Tap Vizier

choose one of the step only

# 1. From website

Query rave6 from their website: https://www.rave-survey.org/query/ 

![rave query page](<images/rave-query-page.png>)

#### The query

divide into multiple RAs for smaller response, if needed

```
SELECT TOP 150000 sparv."hrv_sparv" AS rv_rave, sparv."hrv_error_sparv" AS e_rv_rave, aux."teff_sparv" AS teff_rave, aux."logg_sparv" AS logg_rave, aux."m_h_sparv" AS mh_rave, aux."alpha_sparv" AS alphafe_rave, gaia."source_id" 
FROM ravedr6.dr6_sparv AS sparv 
JOIN ravedr6.dr6_sparv_aux AS aux ON sparv.rave_obs_id = aux.rave_obs_id 
RIGHT JOIN ravedr6.dr6_x_gaiaedr3 AS gaia ON gaia.rave_obs_id = sparv.rave_obs_id 
WHERE gaia.ra BETWEEN 0 AND 60
```

Change `WHERE gaia.ra BETWEEN 0 AND 60` into other interval

# 2. Tap Vizier

In [1]:
from astroquery.utils.tap.core import Tap
from os.path import join, abspath
from os import pardir, curdir, mkdir
from glob import glob
import sys
import numpy as np
import pandas as pd
import vaex

In [2]:
# import utils
util_dir = abspath(pardir)
sys.path.insert(0, util_dir)

from utils import timeout, progressbar, appendName

In [28]:
name = "RAVE6"
root_data_dir = abspath(join(pardir, "Data"))
data_dir = join(root_data_dir, name)
try:
  mkdir(data_dir)
  print(f"Creating {name} dir in Data dir")
except FileExistsError:
  print("Directory already exist. Good to go!")
data_dir

Directory already exist. Good to go!


'/home2/s20321005/Thesis-Project/Data/RAVE6'

In [4]:
# Vizier tap endpoint
tap = Tap(url="http://tapvizier.u-strasbg.fr/TAPVizieR/tap/sync/")

columns_main = ["HRV AS rv_rave", "e_HRV AS e_rv_rave"]
columns_aux = ["Teffs AS teff_rave", "loggs AS logg_rave", 
               "[M/H]s AS mh_rave", "[a/Fe]s AS alphafe_rave"]
columns_xmatch = ["Gaiae3 AS source_id"]

columns_main = list(map(lambda x: appendName(x, "main"), columns_main))
columns_aux = list(map(lambda x: appendName(x, "aux"), columns_aux))
columns_xmatch = list(map(lambda x: appendName(x, "xmatch"), columns_xmatch))

columns = columns_xmatch + columns_aux + columns_main
columns

['xmatch."Gaiae3" AS source_id',
 'aux."Teffs" AS teff_rave',
 'aux."loggs" AS logg_rave',
 'aux."[M/H]s" AS mh_rave',
 'aux."[a/Fe]s" AS alphafe_rave',
 'main."HRV" AS rv_rave',
 'main."e_HRV" AS e_rv_rave']

In [34]:
# divide ra into multiple partitions
ras = np.linspace(0, 360, 10).astype(int)
ras

array([  0,  40,  80, 120, 160, 200, 240, 280, 320, 360])

In [25]:
TOP = 10
ra0 = ras[0]
ra1 = ras[0+1]
query = f"""
SELECT TOP {TOP} HRV 
FROM "III/283/ravedr6" AS main
JOIN "III/283/aux" AS aux ON main.ObsID = aux.ObsID
RIGHT JOIN "III/283/xgaiae3" AS xmatch ON main.ObsID = xmatch.ObsID
WHERE RAJ2000 BETWEEN {ra0} AND {ra1}
"""
cek = timeout(tap.launch_job, args=(query,), timeout_duration=120, minVal=0.2)

In [40]:
TOP = 100_000
i = 0
while i < len(ras) -1:
    ra0 = ras[i]
    ra1 = ras[i+1]
    query = f"""
    SELECT TOP {TOP} {", ".join(columns)} 
    FROM "III/283/ravedr6" AS main
    JOIN "III/283/aux" AS aux ON main.ObsID = aux.ObsID
    RIGHT JOIN "III/283/xgaiae3" AS xmatch ON main.ObsID = xmatch.ObsID
    WHERE RAJ2000 BETWEEN {ra0} AND {ra1}
    """
    job = timeout(tap.launch_job, args=(query,), timeout_duration=120, minVal=0.1)
    if job == None: 
        print("fail to fetch")
        continue
    result = job.get_results()
    result.write(join(data_dir, f"rave-{ra0:03d}-{ra1:03d}.fits"), overwrite=True)
    print(f"{i} saved rave-{ra0:03d}-{ra1:03d}.fits | {len(result)}")
    i +=1

0 saved rave-000-040.fits | 49685
1 saved rave-040-080.fits | 49320
2 saved rave-080-120.fits | 51684
3 saved rave-120-160.fits | 60302
4 saved rave-160-200.fits | 68358
5 saved rave-200-240.fits | 73444
6 saved rave-240-280.fits | 34980
7 saved rave-280-320.fits | 74489
8 saved rave-320-360.fits | 55095


# Cleaning

In [41]:
# workaround weird error from vaex
# combine all files

import vaex
from glob import glob
from astropy.table import Table

# load all fits files that were downloaded
files = glob(join(data_dir, "*.fits"))
files.sort()

# combine in loop
df_com = []
for file in files:
    table = Table.read(file)
    df_pandas = table.to_pandas()
    df = vaex.from_pandas(df_pandas)
    if len(df_com) == 0:
        df_com = df
    else:
        df_com = df_com.concat(df)
# mask large error
mask = (df_com.e_rv_rave < 200)
df_masked = df_com[mask]

In [42]:
df_masked

#,source_id,teff_rave,logg_rave,mh_rave,alphafe_rave,rv_rave,e_rv_rave
0,4972049927935844480,6005,3.82,0.01,0.0,20.92,1.36
1,4900290576426088960,6452,4.86,-0.03,0.11,8.88,2.72
2,4900221307193513216,5546,4.35,-0.01,0.23,52.03,2.99
3,4919128440425220992,6526,4.75,0.16,0.0,22.99,2.25
4,2312704477167095040,4345,1.36,-0.91,0.16,-24.16,0.74
...,...,...,...,...,...,...,...
517279,2306375898691934208,4551,2.28,-0.35,0.0,3.74,0.86
517280,6386817309198311424,4642,1.92,-0.62,0.0,-26.17,0.9
517281,6386817309198311424,4571,2.03,-0.5,0.07,-22.35,2.63
517282,6386817309198311424,4557,1.96,-0.62,0.07,-25.11,1.35


In [43]:
# remove duplicates, if any
df_pandas = df_masked.to_pandas_df()
df_pandas.drop_duplicates("source_id", inplace=True)
df = vaex.from_pandas(df_pandas)
df

#,source_id,teff_rave,logg_rave,mh_rave,alphafe_rave,rv_rave,e_rv_rave
0,4972049927935844480,6005,3.82,0.01,0.0,20.92,1.36
1,4900290576426088960,6452,4.86,-0.03,0.11,8.88,2.72
2,4900221307193513216,5546,4.35,-0.01,0.23,52.03,2.99
3,4919128440425220992,6526,4.75,0.16,0.0,22.99,2.25
4,2312704477167095040,4345,1.36,-0.91,0.16,-24.16,0.74
...,...,...,...,...,...,...,...
450925,6485889525973362688,6435,4.1,-0.18,0.0,16.48,1.16
450926,2334561325218787584,4481,2.88,0.49,0.0,-54.43,1.59
450927,2306375898691934208,4551,2.28,-0.35,0.0,3.74,0.86
450928,6386817309198311424,4642,1.92,-0.62,0.0,-26.17,0.9


In [44]:
# export to hdf5
df.export(join(data_dir, "rave6.hdf5"), progress=True)

export(hdf5) [########################################] 100.00% elapsed time  :     0.06s =  0.0m =  0.0h
 