In [1]:
import numpy as np
import pandas as pd

from os import path

In [2]:
# Data directory:
#data_dir = "~/Scarlett/OneDrive - Liverpool John Moores University/SEPHI_data/"
data_dir = "~/OneDrive/SEPHI_data/"
header_length = 116

In [3]:
# Read the NASA_EA data:
exoplanets = pd.read_csv( path.join(data_dir, f"NASA_EA_2022_02_09.csv"), skiprows=header_length, 
                         usecols=["pl_name", "hostname", "gaia_id", "sy_snum", "sy_pnum", "discoverymethod", "pl_orbper", "pl_orbsmax", "pl_rade", "pl_bmasse", "pl_dens", "pl_orbeccen", "pl_eqt", "pl_orbincl", "st_teff", "st_tefferr1", "st_tefferr2", "st_rad", "st_raderr1", "st_raderr2", "st_mass", "st_met", "st_lum", "st_lumerr1", "st_lumerr2", "st_logg", "st_age", "st_ageerr1", "st_ageerr2", "sy_dist", "sy_plx", "sy_gaiamag"])
# pl_orbper = orbital period [days]
# pl_orbsmax = orbit semi-major axis [au]

# Get rid of exoplanets whose star's haven't been observed by Gaia:
exoplanets.dropna(subset=["gaia_id"], inplace=True)

# The gaia_id in exoplanets is called the 'designation' in Gaia tables, so rename it:
# The designation is the Gaia + space + DR + no. + space + Gaia source_id
# e.g. Gaia DR2 3946945413106333696
exoplanets.rename(columns={ "gaia_id": "designation" }, inplace=True )

# Add a new column called 'source_id' which is listed in Gaia tables
# The source_id is the number in the designation, e.g. 3946945413106333696
exoplanets["source_id"] = exoplanets["designation"].str.rsplit(" ", n=1, expand=True)[1].astype("int64")

# Remove the space in 'hostname' (e.g. 11 Com -> 11Com):
#exoplanets["hostname"] = exoplanets["hostname"].str.replace(" ", "")

print("No. rows in NASA EA: ", exoplanets.shape[0])

No. rows in NASA EA:  4734


In [4]:
# Create a csv containing the designations:
# (useful when using the designations to search for satrs in Gaia)
exoplanets[["designation"]].to_csv(path.join(data_dir, f"designations.csv"), index=False)

In [5]:
# Convert luminosities and uncs from log(solar) to solar:
st_lum_sol = 10**exoplanets["st_lum"] # to undo a log, base^x

# Converting the uncertainties:
# y = 10^x, 
# dy/dx = 10^x * ln(10)
# y_err = dy/dx * x_err
# y_err = 10^x * ln(10) * x_err
# y_err = y * x_err * ln(10)
st_lum_solerr1 = np.multiply(10**exoplanets["st_lum"], exoplanets["st_lumerr1"]) * np.log(10)
st_lum_solerr2 = np.multiply(10**exoplanets["st_lum"], exoplanets["st_lumerr2"]) * np.log(10)
print(st_lum_sol)
print(st_lum_solerr1)
print(st_lum_solerr2)

# Comparing to the result from  upper unc = y + y_err - y = 10^(x-x_err) - 10^(x):
unc1 = 10**(exoplanets["st_lum"] + exoplanets["st_lumerr1"]) - 10**exoplanets["st_lum"]
unc2 = 10**(exoplanets["st_lum"] + exoplanets["st_lumerr2"]) - 10**exoplanets["st_lum"]
print(unc1)
print(unc2)

l = np.where( ( abs(unc1 - st_lum_solerr1) > 10 ) )
print(l)
print(st_lum_solerr1[l[0]])
print(unc1[l[0]])

# The two unc. calculation methods seem to yield similar results but not the same
# TODO: the check above doesn't seem to be working

0       174.984669
1              NaN
2        57.942870
3              NaN
4              NaN
           ...    
4909           NaN
4910           NaN
4911           NaN
4912     63.095734
4913     69.023980
Name: st_lum, Length: 4734, dtype: float64
0       28.607113
1             NaN
2             NaN
3             NaN
4             NaN
          ...    
4909          NaN
4910          NaN
4911          NaN
4912    32.107609
4913          NaN
Length: 4734, dtype: float64
0      -34.247953
1             NaN
2             NaN
3             NaN
4             NaN
          ...    
4909          NaN
4910          NaN
4911          NaN
4912    -7.990581
4913          NaN
Length: 4734, dtype: float64
0       31.078322
1             NaN
2             NaN
3             NaN
4             NaN
          ...    
4909          NaN
4910          NaN
4911          NaN
4912    41.858508
4913          NaN
Length: 4734, dtype: float64
0      -31.104811
1             NaN
2             NaN
3            

In [6]:
# Replace in the table:
exoplanets["st_lum"] = st_lum_sol
exoplanets["st_lumerr1"] = st_lum_solerr1
exoplanets["st_lumerr2"] = st_lum_solerr2

In [7]:
# TODO: (Done sephi_processing atm) Where possible,  calculate teff, L, and R for stars without them listed 
# Calculating lum and unc where teff and rad are available

# Indicies of exoplanets with stellar teff and rad listed:
# We can calc lum for these
c1 = np.where( (( np.isnan(exoplanets["st_teff"])==False) & ( np.isnan(exoplanets["st_rad"])==False )) )
print(c1[0])

[   0    1    2 ... 4728 4732 4733]


In [8]:
from math import pi, sqrt
from astropy.constants import sigma_sb, L_sun, R_sun
sigma = sigma_sb.value

In [9]:
def lum_eqn(T, R):
    """
    T = stellar effective temp df [K]
    R = stellar radius df [solar radius]
    returns luminosity array [solar lum]
    """

    T = T.to_numpy()
    R = R.to_numpy() * R_sun.value
    L = 4*pi*sigma * np.multiply(R**2, T**4) # in W
    L_solar = L / (L_sun.value) # in soalr lum
    
    return L_solar

In [10]:
def lum_unc(T, dT, R, dR, positive_unc=True):
    """
    T = stellar effective temp df [K]    
    dT = uncs in T array [K]
    R = stellar radius df [solar radius]
    dR = uncs in R array [solar rad]
    returns luminosity uncs array [solar lum] NB: always +ve
    """

    T = T.to_numpy()
    dT = dT.to_numpy()
    R = R.to_numpy() * R_sun.value
    dR = dR.to_numpy() * R_sun.value
    
    dL = 4*pi*sigma * np.sqrt( ( 2*np.multiply(R, np.multiply(T**4, dR)) )**2 + ( 4*np.multiply(R**2, np.multiply(T**3, dT)) )**2 ) 
    dL_solar = dL / (L_sun.value)
    
    if (positive_unc==False):
        dL_solar = -dL_solar
    
    return dL_solar

In [11]:
calc_lum = lum_eqn(exoplanets["st_teff"], exoplanets["st_rad"])
calc_lumerr1 =  lum_unc(exoplanets["st_teff"], exoplanets["st_tefferr1"], exoplanets["st_rad"], exoplanets["st_raderr1"]) 
calc_lumerr2 = lum_unc(exoplanets["st_teff"], exoplanets["st_tefferr2"], exoplanets["st_rad"], exoplanets["st_raderr2"], positive_unc=False) 

[164.45499874 251.88396508  58.49819897 ...          nan  62.0332537
  67.72792291]
[37.29783165 49.27003461 10.68038875 ...         nan         nan
         nan]
[-37.29783165 -49.27003461 -10.68038875 ...          nan          nan
          nan]


In [None]:
# TODO: calc T 
# TODO: move L and T calcs to another file
# TODO: Rename columns
# TODO: Save exoplanets df to a csv when happy with it

In [None]:
#exoplanets.to_csv( path.join(data_dir, f"exoplanets.csv") , index=False)

In [15]:
"""
# Function that calculates the stellar luminosity using arrays containing effective temp, radius, and associated uncs:
def calc_luminosity2(T, dT1, dT2, R, dR1, dR2):
    """
    #T = stellar effective temperature array [K]
    #dT1 = positive error
    #dT2 = negative error
    #R = stellar radius array [solar radius]
    #dR1 = positive error
    #dR2 = negative error
    #returns stellar luminosity [solar], positive error, negative error
    """
    #print(R)
    
    # Empty arrays to store L, dL1, dL2 [convert them to solar at end]:
    L = np.zeros(T.size)
    dL1 = np.zeros(T.size) 
    dL2 = np.zeros(T.size) 
    L_solar = np.zeros(T.size)
    dL1_solar = np.zeros(T.size) 
    dL2_solar = np.zeros(T.size) 
    
    # Condition 1; if both T and R and nan:
    c1 = np.where( ( np.isnan(T)) & ( np.isnan(R) ) )
    #print(c1)
    #print(c1[0].size)
    
    # Condition 2; both T and R are real values:
    c2 = np.where( np.isfinite(T) & np.isfinite(R) )
    #print(c2)
    #print(c2[0].size)
    
    # Condition 3; if either dT1 or dR1 are nan:
    c3 = np.where( ( np.isnan(dT1) | np.isnan(dR1) ) )
    
    # Condition 4; if both dT1 and dR1 are real values:
    c4 = np.where( (np.isfinite(dT1)) & (np.isfinite(dR1)) )
    #print("c4:", c4)
    #print("c4 size: ", c4[0].size)
    
    # Condition 5; if either dT2 or dR2 are nan:
    c5 = np.where( ( np.isnan(dT2) | np.isnan(dR2) ) )
    
    # Condition 6; if both dT1 and dR1 are real values:
    c6 = np.where( (np.isnan(dT2)==False) & (np.isnan(dR2)==False) )
    
    
    # If both T and R are nan, then L is nan too
    if(c1[0].size>0):
        L[c1] = np.nan
        dL1[c1] = np.nan
        dL2[c1] = np.nan
        L_solar[c1] = np.nan
        dL1_solar[c1] = np.nan
        dL2_solar[c1] = np.nan
    
    # If T and R are real numbers, convert R and calc L:
    if(c2[0].size>0):
        # Convert R to meters:
        R[c2[0]] = R[c2[0]] * R_sun.value
        #dR1[c2] = dR1[c2] * R_sun.value # Moved to within error calculations
        #dR2[c2] = dR2[c2] * R_sun.value 
        
        #L[c2] = 4 * pi * sigma * R[c2]**2 * T[c2]**4 # in W
        L[c2] = 4*pi*sigma * np.multiply(R**2, T**4) # in W
        L_solar[c2] = L[c2] / L_sun.value # in Solar Luminosities
        print(L_solar[c2])
        
        #L_solar[c2] = lum_eqn(T[c2], R[c2])
        #print(L_solar)
    
        # TODO: Would it be faster just to run the calcs and let them spit nan out?
        
        # Uncertainty calculations, where possible:
        
        # If there are no +ve errors, converted +ve errors are nan:
        if(c3[0].size>0):
            dL1[c3] = np.nan
            dL1_solar[c3] = np.nan
        
        # If both have +ve errors, calc +ve errors:
        if(c4[0].size>0):
            dR1[c4] = dR1[c4] * R_sun.value
            #dL1[c4] = 4*pi*sigma * sqrt( (2*R[c4]*T[c4]**4*dR1[c4])**2 + (R[c4]**2*4*T[c4]**3*dT1[c4])**2 ) # in W
            #dL1_solar[c4] = dL1[c4] / (L_sun.value) # in Solar Lum
            dL1[c4] = 4*pi*sigma * np.sqrt( ( 2*np.multiply(R, np.multiply(T**4, dR1)) )**2 + ( 4*np.multiply(R**2, np.multiply(T**3, dT1)) )**2 ) 
            
            #dL1_solar[c4] = lum_unc(T[c4], dT[c4], R[c4], dR[c4], positive_unc=True)
                        
        # If there are no -ve errors, converted -ve errors are nan:
        if(c5[0].size>0):
            dL2[c5] = np.nan
            dL2_solar[c5] = np.nan
            
        # If both have -ve errors, calc -ve errors:
        if(c6[0].size>0):
            dR2[c6] = dR2[c6] * R_sun.value
            #dL2[c6] = (-1) * 4*pi*sigma * sqrt( (2*R[c6]*T[c6]**4*dR2[c6])**2 + (R[c6]**2*4*T[c6]**3*dT2[c6])**2 ) # in W
            #dL2_solar[c6] = dL2 / (L_sun.value) # in Solar Lum
            dL2[c4] = 4*pi*sigma * np.sqrt( ( 2*np.multiply(R, np.multiply(T**4, dR2)) )**2 + ( 4*np.multiply(R**2, np.multiply(T**3, dT2)) )**2 ) 
                        
            #dL2_solar[c6] = lum_unc(T[c6], dT[c6], R[c6], dR[c6], positive_unc=False)
    
    # TODO: I could write a function for calculating dL1&2
    # TODO: I could use astropy tables and units
    return L_solar, dL1_solar, dL2_solar

# TODO: would it be bad to select the best parameters in the df (when merged with gaia, CKS etc) and then choose which to use to calc L?
"""