SELECT TOP 100 
m.rv_galah, m.e_rv_galah,
m.logg AS logg_galah, m.e_logg AS e_logg_galah,
m.teff AS teff_galah, m.e_teff AS e_teff_galah,
m.fe_h AS feh_galah, m.e_fe_h AS e_feh_galah, 
m.alpha_fe AS alphafe_galah, m.e_alpha_fe AS e_alphafe_galah
FROM galah_dr3.main_star as m

Go to https://datacentral.org.au/services/query/
![galah query page](<images/galah-query-page.png>)

The query
```
SELECT TOP 100000 galah_main.rv_galah,
galah_main.fe_h AS feh_galah, gaia_vac.dr3_source_id AS source_id, 
galah_main.alpha_fe AS alphafe_galah, galah_main.teff AS teff_galah,
galah_main.e_teff AS e_teff_galah, galah_main.logg AS logg_galah,
galah_main.e_logg AS e_logg_galah
FROM galah_dr3p2.main_star as galah_main
INNER JOIN galah_dr3p2.vac_gaia_edr3 as gaia_vac on galah_main.dr3_source_id = gaia_vac.dr3_source_id
WHERE galah_main.flag_sp = 0 AND galah_main.flag_fe_h = 0
AND galah_main.flag_alpha_fe = 0
AND galah_main.ra_dr2 BETWEEN 60 AND 120
```
Divide RA in smaller chunk if necessary

save the downloaded files to `Thesis-Project/Data/GALAH`

In [None]:
# Vizier tap endpoint
tap = Tap(url="http://tapvizier.u-strasbg.fr/TAPVizieR/tap/sync/")

columns_main = ["HRV AS rv_rave", "e_HRV AS e_rv_rave"]
columns_aux = ["Teffs AS teff_rave", "loggs AS logg_rave", 
               "[M/H]s AS mh_rave", "[a/Fe]s AS alphafe_rave"]
columns_xmatch = ["Gaiae3 AS source_id"]

columns_main = list(map(lambda x: appendName(x, "main"), columns_main))
columns_aux = list(map(lambda x: appendName(x, "aux"), columns_aux))
columns_xmatch = list(map(lambda x: appendName(x, "xmatch"), columns_xmatch))

columns = columns_xmatch + columns_aux + columns_main
columns

# Cleaning

Data downloaded from Website have `byte` data type, which is annoying.

convert it to `float64`, follow the following instruction

In [8]:
from astroquery.utils.tap.core import Tap
from os.path import join, abspath
from os import pardir, curdir, mkdir
from glob import glob
from astropy.table import Table
import vaex

In [5]:
name = "GALAH"
root_data_dir = abspath(join(pardir, "Data"))
data_dir = join(root_data_dir, name)
try:
  mkdir(data_dir)
  print(f"Creating {name} dir in Data dir")
except FileExistsError:
  print("Directory already exist. Good to go!")
data_dir

Directory already exist. Good to go!


'/home2/s20321005/Thesis-Project/Data/GALAH'

In [7]:
# load downloaded data in this directory with .fits format
files = glob(join(data_dir, "*.fits"))
files.sort()
files

['/home2/s20321005/Thesis-Project/Data/GALAH/0-180.fits',
 '/home2/s20321005/Thesis-Project/Data/GALAH/180-300.fits',
 '/home2/s20321005/Thesis-Project/Data/GALAH/300-360.fits']

In [9]:
# convert the data types
df_com = []
for file in files:
    table = Table.read(file)
    for col in table.colnames:
        table[col] = table[col].astype("float64")
    df = vaex.from_astropy_table(table)
    if len(df_com) == 0:
        df_com = df
    else:
        df_com = df_com.concat(df)
df_com

#,rv_galah,e_rv_galah,feh_galah,source_id,alphafe_galah,teff_galah,e_teff_galah,logg_galah,e_logg_galah
0,13.437999725341797,0.05400000140070915,0.03595352,4.641115615375957e+18,0.09246221004384556,4506.731,82.13812325830732,2.7686563,0.1915454424534654
1,9.869000434875488,0.12600000202655792,0.18803501,4.641111702662211e+18,-0.10428717418309698,5663.523,128.3193470261073,3.8865454,0.19110837782879747
2,-9.939000129699707,0.1469999998807907,-0.38892412,4.629114377239289e+18,0.10956293667349712,6137.2373,111.88808100892342,4.1912,0.18346672370192932
3,-9.939000129699707,0.1469999998807907,-0.38892412,4.629114377239289e+18,0.10956293667349712,6137.2373,111.88808100892342,4.1912,0.18346672370192932
4,-24.608999252319336,0.09000000357627869,-0.32791662,4.629109326357751e+18,0.055064224666230004,5661.511,86.80019357157002,4.3855586,0.18051787846182552
...,...,...,...,...,...,...,...,...,...
470506,14.376999855041504,0.12200000137090683,0.1387043,6.837094224446874e+18,-0.033266165556283395,5424.1885,123.60539815993849,3.917587,0.1965486258996146
470507,-5.7870001792907715,0.1770000010728836,0.011001587,6.837087833535546e+18,0.026654315913336002,6452.1133,121.06105587151487,4.1991615,0.20145014972469574
470508,-36.01300048828125,0.09700000286102295,0.48132372,6.837109858127866e+18,0.039617528753247075,5668.3193,128.53344891021374,4.189171,0.19843652758638192
470509,14.550000190734863,0.08799999952316284,-0.121267796,6.837118615565941e+18,0.05056864915165997,4467.411,109.93485597196742,4.61917,0.19285309555419056


In [10]:
# remove duplicates, if any
df_pandas = df_com.to_pandas_df()
df_pandas.drop_duplicates("source_id", inplace=True)
df = vaex.from_pandas(df_pandas)
df

#,rv_galah,e_rv_galah,feh_galah,source_id,alphafe_galah,teff_galah,e_teff_galah,logg_galah,e_logg_galah
0,13.437999725341797,0.05400000140070915,0.03595352,4.641115615375957e+18,0.09246221004384556,4506.731,82.13812325830732,2.7686563,0.1915454424534654
1,9.869000434875488,0.12600000202655792,0.18803501,4.641111702662211e+18,-0.10428717418309698,5663.523,128.3193470261073,3.8865454,0.19110837782879747
2,-9.939000129699707,0.1469999998807907,-0.38892412,4.629114377239289e+18,0.10956293667349712,6137.2373,111.88808100892342,4.1912,0.18346672370192932
3,-24.608999252319336,0.09000000357627869,-0.32791662,4.629109326357751e+18,0.055064224666230004,5661.511,86.80019357157002,4.3855586,0.18051787846182552
4,47.25600051879883,0.10999999940395355,-0.20193052,4.629121219123632e+18,-0.0007175464608104134,5579.666,84.7359424959418,4.1815453,0.17985555918706975
...,...,...,...,...,...,...,...,...,...
415421,14.376999855041504,0.12200000137090683,0.1387043,6.837094224446874e+18,-0.033266165556283395,5424.1885,123.60539815993849,3.917587,0.1965486258996146
415422,-5.7870001792907715,0.1770000010728836,0.011001587,6.837087833535546e+18,0.026654315913336002,6452.1133,121.06105587151487,4.1991615,0.20145014972469574
415423,-36.01300048828125,0.09700000286102295,0.48132372,6.837109858127866e+18,0.039617528753247075,5668.3193,128.53344891021374,4.189171,0.19843652758638192
415424,14.550000190734863,0.08799999952316284,-0.121267796,6.837118615565941e+18,0.05056864915165997,4467.411,109.93485597196742,4.61917,0.19285309555419056


In [11]:
# export to hdf5
df.export(join(data_dir, "galah.hdf5"), progress=True)

export(hdf5) [########################################] 100.00% elapsed time  :     0.07s =  0.0m =  0.0h
 