In [1]:
import numpy as np
import pandas as pd
from os import path

from calc_stellar_params import lum_eqn, lum_unc_eqn, teff_eqn, teff_unc_eqn

In [2]:
# Data directory:
#data_dir = "~/Scarlett/OneDrive - Liverpool John Moores University/SEPHI_data/"
data_dir = "~/OneDrive/SEPHI_data/"
header_length = 116

In [3]:
# Read the NASA_EA data:
exoplanets = pd.read_csv( path.join(data_dir, f"NASA_EA_2022_02_09.csv"), skiprows=header_length, 
                         usecols=["pl_name", "hostname", "gaia_id", "sy_snum", "sy_pnum", "discoverymethod", "pl_orbper", "pl_orbsmax", "pl_rade", "pl_bmasse", "pl_dens", "pl_orbeccen", "pl_eqt", "pl_orbincl", "st_teff", "st_tefferr1", "st_tefferr2", "st_rad", "st_raderr1", "st_raderr2", "st_mass", "st_met", "st_lum", "st_lumerr1", "st_lumerr2", "st_logg", "st_age", "st_ageerr1", "st_ageerr2"])
# pl_orbper = orbital period [days]
# pl_orbsmax = orbit semi-major axis [au]
# exculuded "st_spectype" from download due to csv formatting
# , "sy_dist", "sy_plx", "sy_gaiamag"

# Get rid of exoplanets whose star's haven't been observed by Gaia:
exoplanets.dropna(subset=["gaia_id"], inplace=True)

# The gaia_id in exoplanets is called the 'designation' in Gaia tables, so rename it:
# The designation is the Gaia + space + DR + no. + space + Gaia source_id
# e.g. Gaia DR2 3946945413106333696
exoplanets.rename(columns={ "gaia_id": "designation" }, inplace=True )

# Add a new column called 'source_id' which is listed in Gaia tables
# The source_id is the number in the designation, e.g. 3946945413106333696
exoplanets["source_id"] = exoplanets["designation"].str.rsplit(" ", n=1, expand=True)[1].astype("int64")

# Remove the space in 'hostname' (e.g. 11 Com -> 11Com):
#exoplanets["hostname"] = exoplanets["hostname"].str.replace(" ", "")

print("No. rows in NASA EA: ", exoplanets.shape[0])

No. rows in NASA EA:  4734


In [4]:
# Create a csv containing the designations:
# (useful when using the designations to search for satrs in Gaia)
exoplanets[["designation"]].to_csv(path.join(data_dir, f"designations.csv"), index=False)

In [5]:
# Convert luminosities and uncs from log(solar) to solar:
st_lum_sol = 10**exoplanets["st_lum"] # to undo a log, base^x

# Converting the uncertainties:
# y = 10^x, 
# dy/dx = 10^x * ln(10)
# y_err = dy/dx * x_err
# y_err = 10^x * ln(10) * x_err
# y_err = y * x_err * ln(10)
st_lum_solerr1 = np.multiply(10**exoplanets["st_lum"], exoplanets["st_lumerr1"]) * np.log(10)
st_lum_solerr2 = np.multiply(10**exoplanets["st_lum"], exoplanets["st_lumerr2"]) * np.log(10)
print(st_lum_sol)
print(st_lum_solerr1)
print(st_lum_solerr2)

# Comparing to the result from  upper unc = y + y_err - y = 10^(x-x_err) - 10^(x):
unc1 = 10**(exoplanets["st_lum"] + exoplanets["st_lumerr1"]) - 10**exoplanets["st_lum"]
unc2 = 10**(exoplanets["st_lum"] + exoplanets["st_lumerr2"]) - 10**exoplanets["st_lum"]
print(unc1)
print(unc2)

l = np.where( ( abs(unc1 - st_lum_solerr1) > 10 ) )
print(l)
print(st_lum_solerr1[l[0]])
print(unc1[l[0]])

# The two unc. calculation methods seem to yield similar results but not the same
# TODO: the check above doesn't seem to be working

0       174.984669
1              NaN
2        57.942870
3              NaN
4              NaN
           ...    
4909           NaN
4910           NaN
4911           NaN
4912     63.095734
4913     69.023980
Name: st_lum, Length: 4734, dtype: float64
0       28.607113
1             NaN
2             NaN
3             NaN
4             NaN
          ...    
4909          NaN
4910          NaN
4911          NaN
4912    32.107609
4913          NaN
Length: 4734, dtype: float64
0      -34.247953
1             NaN
2             NaN
3             NaN
4             NaN
          ...    
4909          NaN
4910          NaN
4911          NaN
4912    -7.990581
4913          NaN
Length: 4734, dtype: float64
0       31.078322
1             NaN
2             NaN
3             NaN
4             NaN
          ...    
4909          NaN
4910          NaN
4911          NaN
4912    41.858508
4913          NaN
Length: 4734, dtype: float64
0      -31.104811
1             NaN
2             NaN
3            

In [6]:
# Replace in the table:
exoplanets["st_lum"] = st_lum_sol
exoplanets["st_lumerr1"] = st_lum_solerr1
exoplanets["st_lumerr2"] = st_lum_solerr2

In [7]:
## Calculating lum and unc where teff and rad are available

In [8]:
from math import pi, sqrt
from astropy.constants import sigma_sb, L_sun, R_sun
sigma = sigma_sb.value

In [10]:
# Empty arrays for luminosity and the +ve adn -ve uncs.:
calc_lum = np.empty(exoplanets.shape[0])
calc_lumerr1 = np.empty(exoplanets.shape[0])
calc_lumerr2 = np.empty(exoplanets.shape[0])

# Fill the arrays with NaN:
calc_lum[:] = np.nan
calc_lumerr1[:] = np.nan
calc_lumerr2[:] = np.nan

# Condition 1: indicies of exoplanets with stellar teff and rad listed:
c1 = np.where( np.isfinite(exoplanets["st_teff"]) &  np.isfinite(exoplanets["st_rad"]) )

# Condition 2: indicies of exoplanets with dT1 and dR1 listed:
c2 = np.where( np.isfinite(exoplanets["st_teff"]) & np.isfinite(exoplanets["st_tefferr1"]) & np.isfinite(exoplanets["st_rad"]) & np.isfinite(exoplanets["st_raderr1"]) )

# Condition 3: indicies of exoplanets with dT2 adn dR2 listed
c3 = np.where( np.isfinite(exoplanets["st_teff"]) & np.isfinite(exoplanets["st_tefferr2"]) & np.isfinite(exoplanets["st_rad"]) & np.isfinite(exoplanets["st_raderr2"]) )

In [11]:
# Calculate luminosities for indicies c1:
calc_lum[c1] = lum_eqn(exoplanets["st_teff"].iloc[c1], exoplanets["st_rad"].iloc[c1])

# Calculate +ve errors for indicies c2:
calc_lumerr1[c2] =  lum_unc_eqn(exoplanets["st_teff"].iloc[c2], exoplanets["st_tefferr1"].iloc[c2], exoplanets["st_rad"].iloc[c2], exoplanets["st_raderr1"].iloc[c2]) 

# Calculate -ve errors for indicies c3:
calc_lumerr2[c3] = lum_unc_eqn(exoplanets["st_teff"].iloc[c3], exoplanets["st_tefferr2"].iloc[c3], exoplanets["st_rad"].iloc[c3], exoplanets["st_raderr2"].iloc[c3], positive_unc=False) 

In [12]:
# Add the calculated luminosities and their errors to the NASA EA table:
exoplanets["NEAc_lum"] = calc_lum
exoplanets["NEAc_lumerr1"] = calc_lumerr1
exoplanets["NEAc_lumerr2"] = calc_lumerr2

In [None]:
#print(exoplanets)

In [14]:
# Empty arrays for teff and the +ve and -ve uncs.:
calc_teff = np.empty(exoplanets.shape[0])
calc_tefferr1 = np.empty(exoplanets.shape[0])
calc_tefferr2 = np.empty(exoplanets.shape[0])

# Fill the arrays with NaN:
calc_teff[:] = np.nan
calc_tefferr1[:] = np.nan
calc_tefferr2[:] = np.nan

# Condition 1: indicies of exoplanets with stellar lum and rad listed:
c1 = np.where( np.isfinite(exoplanets["st_lum"]) &  np.isfinite(exoplanets["st_rad"]) )
#print(c1[0].size)

# Condition 2: indicies of exoplanets with dL1 and dR1 listed:
c2 = np.where( np.isfinite(exoplanets["st_lum"]) & np.isfinite(exoplanets["st_lumerr1"]) & np.isfinite(exoplanets["st_rad"]) & np.isfinite(exoplanets["st_raderr1"]) )
#print(c2[0].size)

# Condition 3: indicies of exoplanets with dL2 adn dR2 listed
c3 = np.where( np.isfinite(exoplanets["st_lum"]) & np.isfinite(exoplanets["st_lumerr2"]) & np.isfinite(exoplanets["st_rad"]) & np.isfinite(exoplanets["st_raderr2"]) )
#print(c3[0].size)

In [15]:
# Calculate teff for indicies c1:
calc_teff[c1] = teff_eqn(exoplanets["st_lum"].iloc[c1], exoplanets["st_rad"].iloc[c1])
#print(calc_teff[c1].size)

# Calculate +ve errors for indicies c2:
calc_tefferr1[c2] =  teff_unc_eqn(exoplanets["st_lum"].iloc[c2], exoplanets["st_lumerr1"].iloc[c2], exoplanets["st_rad"].iloc[c2], exoplanets["st_raderr1"].iloc[c2]) 

# Calculate -ve errors for indicies c3:
calc_tefferr2[c3] = teff_unc_eqn(exoplanets["st_lum"].iloc[c3], exoplanets["st_lumerr2"].iloc[c3], exoplanets["st_rad"].iloc[c3], exoplanets["st_raderr2"].iloc[c3], positive_unc=False) 

In [16]:
# Add the calculated effective temperatures and their errors to the NASA EA table:
exoplanets["NEAc_teff"] = calc_teff
exoplanets["NEAc_tefferr1"] = calc_tefferr1
exoplanets["NEAc_tefferr2"] = calc_tefferr2

In [None]:
#print(exoplanets)

In [17]:
# Renaming the columns:
cols = ["pl_orbper", "pl_orbsmax", "pl_rade", "pl_bmasse", "pl_dens", "pl_orbeccen", "pl_eqt", "pl_orbincl", "st_teff", "st_tefferr1", "st_tefferr2", "st_rad", "st_raderr1", "st_raderr2", "st_mass", "st_met", "st_lum", "st_lumerr1", "st_lumerr2", "st_logg", "st_age", "st_ageerr1", "st_ageerr2"]
#"pl_name", "hostname", "gaia_id", "sy_snum", "sy_pnum", "discoverymethod",  

new_cols1 = [i.replace("st_", "") for i in cols ] # remove all the st_. Anything with the prefix NEA referrs to the star
print(new_cols1)
new_cols2 = ["NEA_" + str(i) for i in new_cols1] 
cols_dict = {cols[i]: new_cols2[i] for i in range(len(cols))}

exoplanets.rename( columns=cols_dict, inplace=True )
#print(exoplanets)

['pl_orbper', 'pl_orbsmax', 'pl_rade', 'pl_bmasse', 'pl_dens', 'pl_orbeccen', 'pl_eqt', 'pl_orbincl', 'teff', 'tefferr1', 'tefferr2', 'rad', 'raderr1', 'raderr2', 'mass', 'met', 'lum', 'lumerr1', 'lumerr2', 'logg', 'age', 'ageerr1', 'ageerr2']


In [18]:
# Saving the exoplanets df to a csv:
exoplanets.to_csv( path.join(data_dir, f"NASA_EA_processed.csv") , index=False)