# Making the table for the paper

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
data = pd.read_csv("../data/data.csv")

In [4]:
results = pd.read_csv("../data/total_results.csv")

In [5]:
results.keys()

Index(['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1', 'kepid', 'vx_inferred',
       'vx_inferred_errp', 'vx_inferred_errm', 'vx_inferred_err', 'vx_random',
       'vy_inferred', 'vy_inferred_errp', 'vy_inferred_errm',
       'vy_inferred_err', 'vy_random', 'vz_inferred', 'vz_inferred_errp',
       'vz_inferred_errm', 'vz_inferred_err', 'vz_random',
       'lndistance_inferred', 'lndistance_inferred_errp',
       'lndistance_inferred_errm', 'lndistance_inferred_err',
       'lndistance_random', 'vxvx_covar', 'vxvy_covar', 'vxvz_covar',
       'vxlnd_covar', 'vyvy_covar', 'vyvz_covar', 'vylnd_covar', 'vzvz_covar',
       'vzlnd_covar', 'lndlnd_covar'],
      dtype='object')

In [16]:
df = pd.merge(data, results, on="kepid", how="right")
print(np.shape(data), np.shape(results), np.shape(df))
m = np.isfinite(df.kepid.values)
df = df.iloc[m]

(148591, 180) (148591, 34) (148591, 213)


In [166]:
from math import floor
from tqdm import trange

def get_sig(x):
    return abs(int(floor(np.log10(abs(x)))))

def round_err_and_val(val, err):
    sig = get_sig(err) # Get number of dec. places in the uncertainty for 1 sig. fig.
    rounded_err = round(err, sig)
    sig = get_sig(rounded_err) # Repeat in case rounding reduces dec. places.
    return round(val, sig), rounded_err

def round_list(vals, errs):
    val_list, err_list = [np.zeros(len(df)) for i in range(2)]
    for i in trange(len(vals)):
        val, err = round_err_and_val(vals[i], errs[i])
        val_list[i] = val
        err_list[i] = err
    return val_list, err_list

def round_list_nans(vals, errs):
    val_list, err_list = [np.zeros(len(df)) for i in range(2)]
    for i in trange(len(vals)):
        if np.isfinite(vals[i]) and np.isfinite(errs[i]):
            val, err = round_err_and_val(vals[i], errs[i])
            val_list[i] = val
            err_list[i] = err
        else:
            val_list[i] = vals[i]
            err_list[i] = errs[i]
    return val_list, err_list

In [131]:
ra, ra_error = round_list(df.ra.values, df.ra_error.values)

100%|██████████| 148590/148590 [00:01<00:00, 78816.13it/s]


In [132]:
dec, dec_error = round_list(df.dec.values, df.dec_error.values)

100%|██████████| 148590/148590 [00:01<00:00, 80119.96it/s]


In [133]:
parallax, parallax_error = round_list(df.parallax.values, df.parallax_error.values)

100%|██████████| 148590/148590 [00:01<00:00, 79217.74it/s]


In [156]:
distance = np.array([round(i, 1) for i in df.r_med_photogeo.values])
distance_lo = np.array([round(i, 1) for i in df.r_lo_photogeo.values])
distance_hi = np.array([round(i, 1) for i in df.r_hi_photogeo.values])

In [152]:
kepid = np.array([int(i) for i in df.kepid.values])

In [155]:
source_id = np.array([int(i) for i in df.source_id.values])

In [157]:
pmra, pmra_error = round_list(df.pmra.values, df.pmra_error.values)

100%|██████████| 148590/148590 [00:01<00:00, 77266.03it/s]


In [158]:
pmdec, pmdec_error = round_list(df.pmdec.values, df.pmdec_error.values)

100%|██████████| 148590/148590 [00:01<00:00, 79527.73it/s]


In [167]:
dr2_radial_velocity, dr2_radial_velocity_error = round_list_nans(df.dr2_radial_velocity.values,
                                                                 df.dr2_radial_velocity_error.values)

100%|██████████| 148590/148590 [00:00<00:00, 237060.16it/s]


In [172]:
apogee_corrected_rv, APOGEE_RV_ERR = round_list_nans(df.apogee_corrected_rv.values,
                                                df.APOGEE_RV_ERR.values)

100%|██████████| 148590/148590 [00:00<00:00, 328974.14it/s]


In [173]:
lamost_corrected_rv, LAMOST_RV_ERR = round_list_nans(df.lamost_corrected_rv.values,
                                                df.LAMOST_RV_ERR.values)

100%|██████████| 148590/148590 [00:00<00:00, 290126.31it/s]


In [174]:
vx_inferred, vx_inferred_err = round_list(df.vx_inferred.values,
                                          df.vx_inferred_err.values)

100%|██████████| 148590/148590 [00:00<00:00, 155607.76it/s]


In [175]:
vy_inferred, vy_inferred_err = round_list(df.vy_inferred.values,
                                          df.vy_inferred_err.values)

100%|██████████| 148590/148590 [00:01<00:00, 79067.22it/s]


In [176]:
vz_inferred, vz_inferred_err = round_list(df.vz_inferred.values,
                                          df.vz_inferred_err.values)

100%|██████████| 148590/148590 [00:00<00:00, 150138.53it/s]


In [186]:
def sig_figs(x, sig_figs=2):
    x_arr = np.zeros(len(x))
    for i in trange(len(x)):
        if np.isfinite(x[i]):
            x_arr[i] = round(x[i], sig_figs)
        else:
            x_arr[i] = x[i]
    return x_arr

In [187]:
vx = sig_figs(df.vx.values)
vy = sig_figs(df.vy.values)
vz = sig_figs(df.vz.values)


100%|██████████| 148590/148590 [00:00<00:00, 311422.58it/s]
100%|██████████| 148590/148590 [00:00<00:00, 313267.52it/s]
100%|██████████| 148590/148590 [00:00<00:00, 319729.27it/s]


In [194]:
vxvy_covar = sig_figs(df.vxvy_covar.values)
vxvz_covar = sig_figs(df.vxvz_covar.values)
vxlnd_covar = sig_figs(df.vxlnd_covar.values)
vyvz_covar = sig_figs(df.vyvz_covar.values)
vylnd_covar = sig_figs(df.vylnd_covar.values)
vzlnd_covar = sig_figs(df.vzlnd_covar.values)

100%|██████████| 148590/148590 [00:00<00:00, 161694.01it/s]
100%|██████████| 148590/148590 [00:00<00:00, 161850.68it/s]
100%|██████████| 148590/148590 [00:00<00:00, 166007.49it/s]
100%|██████████| 148590/148590 [00:00<00:00, 163619.43it/s]
100%|██████████| 148590/148590 [00:00<00:00, 166976.06it/s]
100%|██████████| 148590/148590 [00:00<00:00, 164862.44it/s]


In [195]:
vxvz

array([24.14, 25.82, 50.88, ..., 43.96, 26.15, 29.14])

In [196]:
table = pd.DataFrame(dict({"kic_id": kepid,
                           "source_id": source_id,
                           "ra": ra, "ra_error": ra_error,
                           "dec": dec, "dec_error": dec_error,
                           "parallax": parallax, "parallax_error": parallax_error,
                           "r_med_photogeo": distance,
                           "r_lo_photogeo": distance_lo,
                           "r_hi_photogeo": distance_hi,
                           "pmra": pmra, "pmra_error": pmra,
                           "pmdec": pmdec, "pmdec_error": pmdec,
                           "gaia_dr2_rv": dr2_radial_velocity,
                           "gaia_dr2_rv_error": dr2_radial_velocity_error,
                           "apogee_rv": apogee_corrected_rv, "apogee_rv_error": APOGEE_RV_ERR,
                           "lamost_rv": lamost_corrected_rv, "lamost_rv_error": LAMOST_RV_ERR,
                           "vx_calc": vx, "vx_inferred": vx_inferred, "vx_inferred_error": vx_inferred_err,
                           "vy_calc": vy, "vy_inferred": vy_inferred, "vy_inferred_error": vy_inferred_err,
                           "vz_calc": vz, "vz_inferred": vz_inferred, "vz_inferred_error": vz_inferred_err,
                           "vxvy_covar": vxvy_covar,
                           "vxvz_covar": vxvz_covar,
                           "vxlnd_covar": vxlnd_covar,
                           "vyvz_covar": vyvz_covar,
                           "vylnd_covar": vylnd_covar,
                           "vzlnd_covar": vzlnd_covar
                          }))

table.to_csv("../data/final_paper_table.csv")

In [197]:
len(table)

148590

In [24]:
[col for col in df.columns if "lam" in col]

['gaia_lamost_angular_separation_arcsec', 'lamost_corrected_rv']

In [16]:
[k for k in df.keys()]

['Unnamed: 0_x',
 'kepid',
 'tm_designation',
 'kepmag',
 'teff',
 'teff_err1',
 'teff_err2',
 'teff_prov',
 'logg',
 'logg_err1',
 'logg_err2',
 'logg_prov',
 'feh',
 'feh_err1',
 'feh_err2',
 'feh_prov',
 'radius',
 'radius_err1',
 'radius_err2',
 'mass',
 'mass_err1',
 'mass_err2',
 'prov_sec',
 'nconfp',
 'nkoi',
 'ntce',
 'jmag',
 'hmag',
 'kmag',
 'planet?',
 'solution_id',
 'designation',
 'source_id',
 'random_index',
 'ref_epoch',
 'ra',
 'ra_error',
 'dec',
 'dec_error',
 'parallax',
 'parallax_error',
 'parallax_over_error',
 'pm',
 'pmra',
 'pmra_error',
 'pmdec',
 'pmdec_error',
 'ra_dec_corr',
 'ra_parallax_corr',
 'ra_pmra_corr',
 'ra_pmdec_corr',
 'dec_parallax_corr',
 'dec_pmra_corr',
 'dec_pmdec_corr',
 'parallax_pmra_corr',
 'parallax_pmdec_corr',
 'pmra_pmdec_corr',
 'astrometric_n_obs_al',
 'astrometric_n_obs_ac',
 'astrometric_n_good_obs_al',
 'astrometric_n_bad_obs_al',
 'astrometric_gof_al',
 'astrometric_chi2_al',
 'astrometric_excess_noise',
 'astrometric_exce