In [1]:
from os.path import join, abspath
from os import pardir, mkdir
from glob import glob
import sys
import numpy as np
import pandas as pd
import vaex

In [2]:
# import utils
util_dir = abspath(pardir)
sys.path.insert(0, util_dir)

from utils import runcmd

In [3]:
root_data_dir = abspath(join(pardir, "Data"))
root_data_dir

'/home2/s20321005/Thesis-Project/Data'

In [4]:
# use https://www.pas.rochester.edu/~emamajek/EEM_dwarf_UBVIJHK_colors_Teff.txt for spectral type classification
# you can download it with wget
download_link = "https://www.pas.rochester.edu/~emamajek/EEM_dwarf_UBVIJHK_colors_Teff.txt"

runcmd(f"wget --directory-prefix={root_data_dir} {download_link}", verbose = True)

 --2022-07-17 13:23:53--  https://www.pas.rochester.edu/~emamajek/EEM_dwarf_UBVIJHK_colors_Teff.txt
Resolving www.pas.rochester.edu (www.pas.rochester.edu)... 128.151.167.201
Connecting to www.pas.rochester.edu (www.pas.rochester.edu)|128.151.167.201|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 55680 (54K) [text/plain]
Saving to: ‘/home2/s20321005/Thesis-Project/Data/EEM_dwarf_UBVIJHK_colors_Teff.txt’

     0K .......... .......... .......... .......... .......... 91% 93.9K 0s
    50K ....                                                  100% 8345G=0.5s

2022-07-17 13:23:55 (102 KB/s) - ‘/home2/s20321005/Thesis-Project/Data/EEM_dwarf_UBVIJHK_colors_Teff.txt’ saved [55680/55680]




In [5]:
file_path = join(root_data_dir, "EEM_dwarf_UBVIJHK_colors_Teff.txt")
file_path

'/home2/s20321005/Thesis-Project/Data/EEM_dwarf_UBVIJHK_colors_Teff.txt'

In [6]:
# convert it into dataframe
cols = ['SpT', 'Teff', 'logT', 'BCv', 'logL', 'Mbol', 'R_Rsun', 'Mv', 'B-V', 'Bt-Vt', 'G-V', 
        'Bp-Rp', 'G-Rp', 'M_G', 'b-y', 'U-B', 'V-Rc', 'V-Ic', 'V-Ks', 'J-H', 'H-Ks', 'M_J', 
        'M_Ks', 'Ks-W1', 'W1-W2', 'W1-W3', 'W1-W4', 'g-r', 'i-z', 'z-Y', 'Msun']
col_dict = {}
for col in cols:
    col_dict[col]= []
df_SpT = vaex.from_arrays(**col_dict)
df_SpT
with open(file_path) as fp:
    for count, line in enumerate(fp):
        if count <= 22: continue
        elif count >= 141: break
        row = line.split()
        row = row[:-1]
        col_dict = {}
        for i, (cell, col) in enumerate(zip(row, cols)):
            if i >0: 
                try:
                    row[i] = float(cell)
                except ValueError:
                    row[i] = np.nan
            col_dict[col] = [row[i]]
        df = vaex.from_arrays(**col_dict)
        df_SpT = df_SpT.concat(df)
df_SpT

#,SpT,Teff,logT,BCv,logL,Mbol,R_Rsun,Mv,B-V,Bt-Vt,G-V,Bp-Rp,G-Rp,M_G,b-y,U-B,V-Rc,V-Ic,V-Ks,J-H,H-Ks,M_J,M_Ks,Ks-W1,W1-W2,W1-W3,W1-W4,g-r,i-z,z-Y,Msun
0,O3V,44900.0,4.652,-4.01,5.82,-9.81,13.43,-5.8,-0.33,,,,,,,-1.175,,,,,,,,,,,,,,,59.0
1,O4V,42900.0,4.632,-3.89,5.65,-9.39,12.13,-5.5,-0.326,,,,,,,-1.16,,,,,,,,,,,,,,,48.0
2,O5V,41400.0,4.617,-3.76,5.54,-9.11,11.45,-5.35,-0.323,,,,,,-0.133,-1.15,,,,,,,,,,,,-0.62,,,43.0
3,O5.5V,40500.0,4.607,-3.67,5.44,-8.87,10.71,-5.2,-0.322,,,,,,-0.133,-1.145,,,,,,,,,,,,-0.62,,,38.0
4,O6V,39500.0,4.597,-3.57,5.36,-8.67,10.27,-5.1,-0.321,,,,,,-0.132,-1.14,,,,,,,,,,,,-0.62,,,35.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,Y0.5V,400.0,2.602,,,,,,,,,,,,,,,,,,-0.6,22.0,21.6,,3.9,,,,,,
114,Y1V,360.0,2.556,,,,,,,,,,,,,,,,,,,22.7,22.2,,4.1,,,,,,
115,Y1.5V,325.0,2.512,,,,,,,,,,,,,,,,,,-0.8,23.2,22.9,,4.4,,,,,,
116,Y2V,320.0,2.505,,,,,,,,,,,,,,,,,,,23.6,23.5,,4.7,,,,,,


In [7]:
# Export to hdf5
df_SpT.export(join(root_data_dir, "mamajek-spectral-class.hdf5"), progress=True)

export(hdf5) [########################################] 100.00% elapsed time  :     0.75s =  0.0m =  0.0h
 