# MACHINE LEARNING - APPLICATION OF THE MODEL TO CARMENES DATA

In this notebook we finally apply the classifier to our CARMENES real dataset.

## Modules and configuration

### Modules

In [1]:
import pandas as pd

import sys

#from sklearn.experimental import enable_hist_gradient_boosting
# Not sure why this 'experimental' import is needed, as the sklearn version is 0.24.x > 0.21.x
from sklearn.ensemble import HistGradientBoostingClassifier

from joblib import dump, load


### Configuration

In [2]:
# CONFIGURATION:

# FILES AND FOLDERS
REAL_DATASET = "../data/DATASETS_CESIUM/cesium_GTO_Dataset.csv"
PREDICTION_RESULTS_FILE = "./CARMENES_GTO_prediction_results.csv"
LATEX_TABLE_OUT_FILE =  "./CARMENES_GTO_star_prediction_list.tex"
OUT_FOLDER = "../data/DATASETS_ML/"
OUT_MODELS_FOLDER = "../data/MODELS_ML/"
REAL_PROCESSED_DS = "GTO_Dataset_Only_Valid_records.csv"

# FEATURES TO KEEP - To remove the Lomb-Scargle (Periodic) 'cesium' features
KEEP_CS_FEATURES = ['all_times_nhist_numpeaks',
                   'all_times_nhist_peak1_bin', 'all_times_nhist_peak2_bin', 'all_times_nhist_peak3_bin', 'all_times_nhist_peak4_bin',
                   'all_times_nhist_peak_1_to_2', 'all_times_nhist_peak_1_to_3', 'all_times_nhist_peak_1_to_4',
                   'all_times_nhist_peak_2_to_3', 'all_times_nhist_peak_2_to_4',
                   'all_times_nhist_peak_3_to_4',
                   'all_times_nhist_peak_val',
                   'avg_double_to_single_step', 'avg_err', 'avgt',
                   'cad_probs_1', 'cad_probs_10', 'cad_probs_20', 'cad_probs_30', 'cad_probs_40', 'cad_probs_50',
                   'cad_probs_100', 'cad_probs_500', 'cad_probs_1000', 'cad_probs_5000',
                   'cad_probs_10000', 'cad_probs_50000', 'cad_probs_100000', 'cad_probs_500000',
                   'cad_probs_1000000', 'cad_probs_5000000', 'cad_probs_10000000',
                   'cads_avg', 'cads_med', 'cads_std', 'mean',
                   'med_double_to_single_step', 'med_err',
                   'n_epochs', 'std_double_to_single_step', 'std_err',
                   'total_time', 'amplitude',
                   'flux_percentile_ratio_mid20', 'flux_percentile_ratio_mid35', 'flux_percentile_ratio_mid50',
                   'flux_percentile_ratio_mid65', 'flux_percentile_ratio_mid80',
                   'max_slope', 'maximum', 'median', 'median_absolute_deviation', 'minimum',
                   'percent_amplitude', 'percent_beyond_1_std', 'percent_close_to_median', 'percent_difference_flux_percentile',
                   'period_fast', 'qso_log_chi2_qsonu', 'qso_log_chi2nuNULL_chi2nu', 'skew', 'std',
                   'stetson_j', 'stetson_k', 'weighted_average', 'fold2P_slope_10percentile', 'fold2P_slope_90percentile']


### Functions

## Load the CARMENES set

In [3]:
gto = pd.read_csv(REAL_DATASET, sep=',', decimal='.')
gto.head()

Unnamed: 0,Karmn,SpT,SpTnum,Teff_K,eTeff_K,logg,elogg,[Fe/H],e[Fe/H],L_Lsol,...,freq_signif_ratio_31,freq_varrat,freq_y_offset,linear_trend,medperc90_2p_p,p2p_scatter_2praw,p2p_scatter_over_mad,p2p_scatter_pfold_over_mad,p2p_ssqr_diff_over_var,scatter_res_raw
0,J23585+076,M3.0 V,3.0,3496.0,20.0,4.97,0.08,-0.06,0.06,0.029308,...,0.720716,0.066449,-36.935032,-3.130905,3.234152,7.193835,0.098248,0.252732,0.063294,0.079189
1,J23556-061,M2.5 V,2.5,3639.0,30.0,4.84,0.14,-0.02,0.08,0.046061,...,0.6648,0.006089,-51.984633,-0.718425,5.582731,1.0,0.143169,0.143169,0.053713,0.038826
2,J23548+385,M4.0 V,4.0,3263.0,16.0,5.13,0.1,-0.55,0.09,0.010424,...,0.948789,0.34326,-2.149188,-0.054085,,1.07728,1.567387,1.073014,1.917366,0.169913
3,J23505-095,M4.0 V,4.0,3377.0,34.0,4.83,0.1,-0.08,0.1,0.010298,...,0.809719,0.498257,-2.800944,9.6e-05,0.793332,1.495594,1.052292,1.496514,0.894181,0.684958
4,J23492+024,M1.0 V,1.0,3573.0,23.0,4.94,0.13,-0.55,0.08,0.025559,...,0.801223,0.598726,-0.009706,0.001959,0.930585,1.783765,0.723985,1.192476,1.009627,0.744844


In [4]:
print(gto.columns.tolist())

['Karmn', 'SpT', 'SpTnum', 'Teff_K', 'eTeff_K', 'logg', 'elogg', '[Fe/H]', 'e[Fe/H]', 'L_Lsol', 'eL_Lsol', 'R_Rsol', 'eR_Rsol', 'M_Msol', 'eM_Msol', 'muRA_masa-1', 'emuRA_masa-1', 'muDE_masa-1', 'emuDE_masa-1', 'pi_mas', 'epi_mas', 'd_pc', 'ed_pc', 'Vr_kms-1', 'eVr_kms-1', 'ruwe', 'U_kms-1', 'eU_kms-1', 'V_kms-1', 'eV_kms-1', 'W_kms-1', 'eW_kms-1', 'sa_m/s/a', 'esa_m/s/a', 'Pop', 'vsini_flag', 'vsini_kms-1', 'P_d', 'pEWHalpha_A', 'epEWHalpha_A', 'Activity', 'FUV_mag', 'eFUV_mag', 'NUV_mag', 'eNUV_mag', 'u_mag', 'eu_mag', 'BT_mag', 'eBT_mag', 'B_mag', 'eB_mag', 'BP_mag', 'eBP_mag', 'g_mag', 'eg_mag', 'VT_mag', 'eVT_mag', 'V_mag', 'eV_mag', 'Ra_mag', 'r_mag', 'er_mag', 'GG_mag', 'eGG_mag', 'i_mag', 'ei_mag', 'RP_mag', 'eRP_mag', 'IN_mag', 'J_mag', 'eJ_mag', 'H_mag', 'eH_mag', 'Ks_mag', 'eKs_mag', 'QFlag_2M', 'W1_mag', 'eW1_mag', 'W2_mag', 'eW2_mag', 'W3_mag', 'eW3_mag', 'W4_mag', 'eW4_mag', 'QFlag_WISE', 'Multiplicity', 'Planet', 'PlanetNum', 'Teff_min_K', 'Teff_max_K', 'logg_min', 'logg

Keep only the values marked with `VALID_RECORD` = True. 

In [5]:
gto_valid = gto[gto['VALID_RECORD'] == True].copy()
gto_invalid = gto[gto['VALID_RECORD'] == False].copy()
gto_invalid

Unnamed: 0,Karmn,SpT,SpTnum,Teff_K,eTeff_K,logg,elogg,[Fe/H],e[Fe/H],L_Lsol,...,freq_signif_ratio_31,freq_varrat,freq_y_offset,linear_trend,medperc90_2p_p,p2p_scatter_2praw,p2p_scatter_over_mad,p2p_scatter_pfold_over_mad,p2p_ssqr_diff_over_var,scatter_res_raw
92,J17572+707,M7.5 V,7.5,2600.0,50.0,5.0,0.5,0.0,,0.001092,...,,,,,,,,,,
99,J17198+417,M2.5 V,2.5,3540.0,20.0,4.96,0.12,-0.2,0.07,0.018012,...,,,,,,,,,,
115,J16102-193,M3.0 V,3.0,3575.0,55.0,4.48,0.19,-0.02,0.07,0.102624,...,,,,,,,,,,
270,J06318+414,M5.0 V,5.0,3084.0,13.0,4.88,0.07,-0.03,0.05,0.01338,...,,,,,,,,,,
273,J06103+821,M2.0 V,2.0,3554.0,20.0,4.95,0.13,-0.16,0.07,0.02409,...,,,,,,,,,,
309,J04173+088,M4.5 V,4.5,3100.0,50.0,5.5,0.25,0.0,,0.00656,...,,,,,,,,,,
344,J01352-072,M4.0 V,4.0,3052.0,12.0,4.73,0.15,-0.02,0.08,0.04775,...,,,,,,,,,,


Reset the indices of valid values, but keep them for later reference to the original CARMENES GTO list.

In [6]:
gto_valid = gto_valid.reset_index().copy()
gto_valid.tail()

Unnamed: 0,index,Karmn,SpT,SpTnum,Teff_K,eTeff_K,logg,elogg,[Fe/H],e[Fe/H],...,freq_signif_ratio_31,freq_varrat,freq_y_offset,linear_trend,medperc90_2p_p,p2p_scatter_2praw,p2p_scatter_over_mad,p2p_scatter_pfold_over_mad,p2p_ssqr_diff_over_var,scatter_res_raw
351,358,J00286-066,M4.0 V,4.0,3419.0,28.0,4.81,0.08,-0.11,0.08,...,0.969714,0.153847,0.224804,0.004742,1.361729,1.156685,1.007167,1.141149,1.237745,0.41465
352,359,J00184+440,M3.5 V,3.5,3318.0,53.0,5.2,0.11,-0.36,0.17,...,0.80128,0.090236,-0.465545,0.006788,1.015351,2.800953,0.654925,0.868544,0.743569,0.541385
353,360,J00183+440,M1.0 V,1.0,3603.0,24.0,4.99,0.14,-0.52,0.11,...,0.586709,0.322146,-1.425685,0.004074,1.152054,1.327331,0.797213,0.864388,1.073876,0.692976
354,361,J00067-075,M5.5 V,5.5,3169.0,53.0,5.2,0.16,-0.15,0.22,...,0.862849,0.540831,-0.588889,-0.000143,1.013064,1.286838,0.955929,1.203872,1.116822,0.58144
355,362,J00051+457,M1.0 V,1.0,3773.0,16.0,5.07,0.08,-0.04,0.05,...,0.941262,0.019376,-0.843306,0.001784,1.22631,1.162811,1.095399,1.153078,1.321419,0.334245


In [7]:
gto_valid.shape

(356, 209)

**OBSERVATION:** we have 356 stars, a slightly different number than the one in the document (355 stars), we need to check what is the difference.

### Keep only the relevant features for the classifier

We will later reattach all the features that we drop (`Karmn`, `SpT`, etc.).

In [8]:
X_new = gto_valid[KEEP_CS_FEATURES].copy()
X_new

Unnamed: 0,all_times_nhist_numpeaks,all_times_nhist_peak1_bin,all_times_nhist_peak2_bin,all_times_nhist_peak3_bin,all_times_nhist_peak4_bin,all_times_nhist_peak_1_to_2,all_times_nhist_peak_1_to_3,all_times_nhist_peak_1_to_4,all_times_nhist_peak_2_to_3,all_times_nhist_peak_2_to_4,...,period_fast,qso_log_chi2_qsonu,qso_log_chi2nuNULL_chi2nu,skew,std,stetson_j,stetson_k,weighted_average,fold2P_slope_10percentile,fold2P_slope_90percentile
0,10.0,9.0,12.0,17.0,4.0,1.466667,1.466667,1.692308,1.000000,1.153846,...,2428.395775,13.396114,-0.020659,0.081246,1932.249620,17069.155147,1.086586,-78.425440,-2.449361e+03,3.371539e+03
1,8.0,0.0,3.0,18.0,27.0,1.666667,1.666667,2.083333,1.000000,1.250000,...,2463.270488,10.820810,0.193463,-1.309507,384.241795,2870.806026,0.853169,-181.806843,-1.324874e-02,-1.313052e-02
2,15.0,13.0,16.0,23.0,30.0,1.000000,1.000000,1.000000,1.000000,1.000000,...,148.594904,4.761606,0.461005,0.390684,35.983398,300.546520,0.997149,-18.363864,,
3,10.0,10.0,22.0,12.0,31.0,1.071111,1.095455,4.381818,1.022727,4.090909,...,307.841822,2.179387,0.105766,-0.772164,4.546157,32.348223,0.877195,-0.856279,-2.454864e+01,4.191963e+01
4,7.0,8.0,16.0,24.0,33.0,1.200000,1.758341,5.038760,1.465284,4.198966,...,60.582537,2.099344,0.161328,0.150726,3.498774,26.151409,0.937809,-2.082269,-1.351914e+01,1.367290e+01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351,9.0,10.0,12.0,36.0,26.0,1.506849,1.864407,3.055556,1.237288,2.027778,...,1445.212061,2.342356,0.156405,-0.833435,3.473389,27.390106,0.981612,4.777301,-2.855644e-07,4.746087e-07
352,13.0,7.0,11.0,9.0,3.0,1.000000,1.159763,1.185006,1.159763,1.185006,...,1807.485713,2.730811,0.332126,-0.138383,3.874337,28.959831,0.936082,-8.425670,-2.865074e-02,2.842988e-02
353,10.0,10.0,19.0,24.0,26.0,5.729958,6.722772,7.760000,1.173267,1.354286,...,54.425670,2.516360,0.321295,-1.101207,3.595080,25.281666,0.870784,-2.714845,-9.836043e-01,5.057443e-01
354,6.0,29.0,27.0,16.0,45.0,1.262032,1.627586,1.918699,1.289655,1.520325,...,129.728365,1.972367,0.214226,-0.117214,3.591741,27.880712,0.965424,1.660685,-2.642510e+01,4.173763e+01


### Store the ML dataset for real data

Notice that this one, unlike the synthetic dataset, has no labels.

In [9]:
X_new.to_csv(OUT_FOLDER + REAL_PROCESSED_DS, sep=',', decimal='.', index=False)

## Load the fitted model

In [10]:
clf = load(OUT_MODELS_FOLDER + "Best_Model_After_RandSearchCV.joblib")

In [11]:
clf.best_estimator_

HistGradientBoostingClassifier(learning_rate=0.08887773751235287, max_iter=53,
                               max_leaf_nodes=None, min_samples_leaf=58,
                               random_state=11)

## Predicting the probability of pulsation for the CARMENES stars

In [12]:
predict_proba = clf.best_estimator_.predict_proba(X_new)
predict_proba[:10]

array([[0.50220235, 0.49779765],
       [0.60039452, 0.39960548],
       [0.29647641, 0.70352359],
       [0.15677175, 0.84322825],
       [0.24998208, 0.75001792],
       [0.18350945, 0.81649055],
       [0.13967901, 0.86032099],
       [0.09517852, 0.90482148],
       [0.16204147, 0.83795853],
       [0.22727351, 0.77272649]])

We need to extract the second value as the probability of being a pulsating star.

In [13]:
predict_proba.shape

(356, 2)

In [14]:
predict_proba[:, 1][:10]

array([0.49779765, 0.39960548, 0.70352359, 0.84322825, 0.75001792,
       0.81649055, 0.86032099, 0.90482148, 0.83795853, 0.77272649])

In [15]:
pulsating_proba = predict_proba[:, 1].copy()
pulsating_proba[:10]

array([0.49779765, 0.39960548, 0.70352359, 0.84322825, 0.75001792,
       0.81649055, 0.86032099, 0.90482148, 0.83795853, 0.77272649])

In [16]:
pulsating_proba = pd.DataFrame(data={'Pulsation_probability': predict_proba[:, 1]})
pulsating_proba.head()

Unnamed: 0,Pulsation_probability
0,0.497798
1,0.399605
2,0.703524
3,0.843228
4,0.750018


### Append the predicted probabilities to the CARMENES GTO data

In [17]:
gto_valid_res = pd.concat([gto_valid, pulsating_proba], axis=1)
gto_valid_res

Unnamed: 0,index,Karmn,SpT,SpTnum,Teff_K,eTeff_K,logg,elogg,[Fe/H],e[Fe/H],...,freq_varrat,freq_y_offset,linear_trend,medperc90_2p_p,p2p_scatter_2praw,p2p_scatter_over_mad,p2p_scatter_pfold_over_mad,p2p_ssqr_diff_over_var,scatter_res_raw,Pulsation_probability
0,0,J23585+076,M3.0 V,3.0,3496.0,20.0,4.97,0.08,-0.06,0.06,...,0.066449,-36.935032,-3.130905,3.234152,7.193835,0.098248,0.252732,0.063294,0.079189,0.497798
1,1,J23556-061,M2.5 V,2.5,3639.0,30.0,4.84,0.14,-0.02,0.08,...,0.006089,-51.984633,-0.718425,5.582731,1.000000,0.143169,0.143169,0.053713,0.038826,0.399605
2,2,J23548+385,M4.0 V,4.0,3263.0,16.0,5.13,0.10,-0.55,0.09,...,0.343260,-2.149188,-0.054085,,1.077280,1.567387,1.073014,1.917366,0.169913,0.703524
3,3,J23505-095,M4.0 V,4.0,3377.0,34.0,4.83,0.10,-0.08,0.10,...,0.498257,-2.800944,0.000096,0.793332,1.495594,1.052292,1.496514,0.894181,0.684958,0.843228
4,4,J23492+024,M1.0 V,1.0,3573.0,23.0,4.94,0.13,-0.55,0.08,...,0.598726,-0.009706,0.001959,0.930585,1.783765,0.723985,1.192476,1.009627,0.744844,0.750018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351,358,J00286-066,M4.0 V,4.0,3419.0,28.0,4.81,0.08,-0.11,0.08,...,0.153847,0.224804,0.004742,1.361729,1.156685,1.007167,1.141149,1.237745,0.414650,0.672591
352,359,J00184+440,M3.5 V,3.5,3318.0,53.0,5.20,0.11,-0.36,0.17,...,0.090236,-0.465545,0.006788,1.015351,2.800953,0.654925,0.868544,0.743569,0.541385,0.862921
353,360,J00183+440,M1.0 V,1.0,3603.0,24.0,4.99,0.14,-0.52,0.11,...,0.322146,-1.425685,0.004074,1.152054,1.327331,0.797213,0.864388,1.073876,0.692976,0.849179
354,361,J00067-075,M5.5 V,5.5,3169.0,53.0,5.20,0.16,-0.15,0.22,...,0.540831,-0.588889,-0.000143,1.013064,1.286838,0.955929,1.203872,1.116822,0.581440,0.864630


### Rename the `index` column

In [18]:
gto_valid_res.rename(columns={'index': 'Original_index'}, inplace=True)
gto_valid_res

Unnamed: 0,Original_index,Karmn,SpT,SpTnum,Teff_K,eTeff_K,logg,elogg,[Fe/H],e[Fe/H],...,freq_varrat,freq_y_offset,linear_trend,medperc90_2p_p,p2p_scatter_2praw,p2p_scatter_over_mad,p2p_scatter_pfold_over_mad,p2p_ssqr_diff_over_var,scatter_res_raw,Pulsation_probability
0,0,J23585+076,M3.0 V,3.0,3496.0,20.0,4.97,0.08,-0.06,0.06,...,0.066449,-36.935032,-3.130905,3.234152,7.193835,0.098248,0.252732,0.063294,0.079189,0.497798
1,1,J23556-061,M2.5 V,2.5,3639.0,30.0,4.84,0.14,-0.02,0.08,...,0.006089,-51.984633,-0.718425,5.582731,1.000000,0.143169,0.143169,0.053713,0.038826,0.399605
2,2,J23548+385,M4.0 V,4.0,3263.0,16.0,5.13,0.10,-0.55,0.09,...,0.343260,-2.149188,-0.054085,,1.077280,1.567387,1.073014,1.917366,0.169913,0.703524
3,3,J23505-095,M4.0 V,4.0,3377.0,34.0,4.83,0.10,-0.08,0.10,...,0.498257,-2.800944,0.000096,0.793332,1.495594,1.052292,1.496514,0.894181,0.684958,0.843228
4,4,J23492+024,M1.0 V,1.0,3573.0,23.0,4.94,0.13,-0.55,0.08,...,0.598726,-0.009706,0.001959,0.930585,1.783765,0.723985,1.192476,1.009627,0.744844,0.750018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351,358,J00286-066,M4.0 V,4.0,3419.0,28.0,4.81,0.08,-0.11,0.08,...,0.153847,0.224804,0.004742,1.361729,1.156685,1.007167,1.141149,1.237745,0.414650,0.672591
352,359,J00184+440,M3.5 V,3.5,3318.0,53.0,5.20,0.11,-0.36,0.17,...,0.090236,-0.465545,0.006788,1.015351,2.800953,0.654925,0.868544,0.743569,0.541385,0.862921
353,360,J00183+440,M1.0 V,1.0,3603.0,24.0,4.99,0.14,-0.52,0.11,...,0.322146,-1.425685,0.004074,1.152054,1.327331,0.797213,0.864388,1.073876,0.692976,0.849179
354,361,J00067-075,M5.5 V,5.5,3169.0,53.0,5.20,0.16,-0.15,0.22,...,0.540831,-0.588889,-0.000143,1.013064,1.286838,0.955929,1.203872,1.116822,0.581440,0.864630


### Sort the results by probability, in descending order

In [19]:
gto_valid_res.sort_values(by='Pulsation_probability', ascending=False, inplace=True)
gto_valid_res

Unnamed: 0,Original_index,Karmn,SpT,SpTnum,Teff_K,eTeff_K,logg,elogg,[Fe/H],e[Fe/H],...,freq_varrat,freq_y_offset,linear_trend,medperc90_2p_p,p2p_scatter_2praw,p2p_scatter_over_mad,p2p_scatter_pfold_over_mad,p2p_ssqr_diff_over_var,scatter_res_raw,Pulsation_probability
204,207,J10167-119,M3.0 V,3.0,3552.0,20.0,4.84,0.09,-0.06,0.06,...,0.010699,-0.038075,-0.000176,0.001038,0.669930,1.484074,0.734956,2.450723,0.047777,0.957453
221,224,J09286-121,M2.5 V,2.5,3500.0,50.0,,,,,...,0.013887,0.383039,-0.049409,1.393174,1.866954,1.298225,1.837091,1.340171,0.135780,0.946779
80,80,J18221+063,M4.0 V,4.0,3397.0,28.0,4.89,0.09,-0.35,0.11,...,0.002324,-0.197556,0.000298,2.484991,0.555287,1.731505,0.612871,2.727270,0.056100,0.939559
257,260,J07051-101,M5.0 V,5.0,3100.0,50.0,4.00,0.25,0.00,,...,0.580446,30.118283,-0.480066,0.841980,1.000000,15.942813,15.942813,4.491174,4.068191,0.924887
193,196,J10482-113,M6.5 V,6.5,3029.0,25.0,5.33,0.16,-0.08,0.17,...,0.074469,-0.705783,-0.000918,1.022091,1.076621,1.253899,1.502272,1.664260,0.615618,0.922059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159,162,J12350+098,M2.5 V,2.5,3556.0,16.0,4.82,0.07,-0.19,0.07,...,0.027689,0.162270,0.017002,4.633889,0.931735,2.281189,2.418441,1.636822,0.033193,0.468642
203,206,J10182-204,M4.5 V+,4.5,3000.0,50.0,4.50,0.25,0.00,,...,0.726871,-516.224275,-44.914882,1.102812,0.397013,1.896560,0.472488,2.312924,0.813147,0.463239
135,138,J14155+046,M5.0 V+,5.0,3100.0,50.0,4.00,0.25,0.00,,...,0.352764,323.146884,32.068229,1.264292,4.791896,0.294916,0.666503,0.272728,0.446721,0.447705
197,200,J10354+694,M3.5 V+,3.5,3300.0,50.0,5.50,0.25,0.00,,...,0.304887,107.256277,2.925842,0.019910,0.427790,0.576089,0.275770,1.235865,0.163294,0.403136


### Save the result for later reference

In [20]:
gto_valid_res.to_csv(PREDICTION_RESULTS_FILE, sep=',', decimal='.', index=False)

### Export tables to Latex

In [21]:
print(gto_valid_res.columns.to_list())

['Original_index', 'Karmn', 'SpT', 'SpTnum', 'Teff_K', 'eTeff_K', 'logg', 'elogg', '[Fe/H]', 'e[Fe/H]', 'L_Lsol', 'eL_Lsol', 'R_Rsol', 'eR_Rsol', 'M_Msol', 'eM_Msol', 'muRA_masa-1', 'emuRA_masa-1', 'muDE_masa-1', 'emuDE_masa-1', 'pi_mas', 'epi_mas', 'd_pc', 'ed_pc', 'Vr_kms-1', 'eVr_kms-1', 'ruwe', 'U_kms-1', 'eU_kms-1', 'V_kms-1', 'eV_kms-1', 'W_kms-1', 'eW_kms-1', 'sa_m/s/a', 'esa_m/s/a', 'Pop', 'vsini_flag', 'vsini_kms-1', 'P_d', 'pEWHalpha_A', 'epEWHalpha_A', 'Activity', 'FUV_mag', 'eFUV_mag', 'NUV_mag', 'eNUV_mag', 'u_mag', 'eu_mag', 'BT_mag', 'eBT_mag', 'B_mag', 'eB_mag', 'BP_mag', 'eBP_mag', 'g_mag', 'eg_mag', 'VT_mag', 'eVT_mag', 'V_mag', 'eV_mag', 'Ra_mag', 'r_mag', 'er_mag', 'GG_mag', 'eGG_mag', 'i_mag', 'ei_mag', 'RP_mag', 'eRP_mag', 'IN_mag', 'J_mag', 'eJ_mag', 'H_mag', 'eH_mag', 'Ks_mag', 'eKs_mag', 'QFlag_2M', 'W1_mag', 'eW1_mag', 'W2_mag', 'eW2_mag', 'W3_mag', 'eW3_mag', 'W4_mag', 'eW4_mag', 'QFlag_WISE', 'Multiplicity', 'Planet', 'PlanetNum', 'Teff_min_K', 'Teff_max_K',

In [22]:
INTEREST_COLUMNS = ['Karmn', 'InstBand_ranged', 'Pulsation_probability']

#### Top 10

In [23]:
print(gto_valid_res[INTEREST_COLUMNS].head(10).to_latex(index=False))

\begin{tabular}{llr}
\toprule
     Karmn & InstBand\_ranged &  Pulsation\_probability \\
\midrule
J10167-119 &     He3-burning &               0.957453 \\
J09286-121 &            none &               0.946779 \\
J18221+063 &     He3-burning &               0.939559 \\
J07051-101 &            none &               0.924887 \\
J10482-113 &            none &               0.922059 \\
J07386-212 &     He3-burning &               0.919774 \\
J15369-141 &     He3-burning &               0.918155 \\
J16462+164 &     He3-burning &               0.913001 \\
J11511+352 &     He3-burning &               0.910444 \\
J22298+414 &     He3-burning &               0.910404 \\
\bottomrule
\end{tabular}



#### All stars

This one is sent to a file instead:

In [24]:
original_stdout = sys.stdout # Save a reference to the original standard output

with open(LATEX_TABLE_OUT_FILE, 'w') as f:
    sys.stdout = f # Change the standard output to the file we created.
    print(gto_valid_res[INTEREST_COLUMNS].to_latex(index=False, longtable=True))
    sys.stdout = original_stdout # Reset the standard output to its original value
