In [1]:
import numpy as np
import pandas as pd
from astropy.constants import sigma_sb, L_sun, R_sun
from math import pi, sqrt

import preprocessing.calc_sephi

In [2]:
# Exoplanet directory:
exoplanets_dir = "~/Scarlett/OneDrive - Liverpool John Moores University/SEPHI_data/NASA_EA_2022_02_09.csv"
#exoplanets_dir = "~/OneDrive/SEPHI_data/NASA_EA_2022_02_09.csv"

# The length of the header in the exoplanets csv file:
header_length = 116

In [3]:
# Read data
exoplanets = pd.read_csv(exoplanets_dir, skiprows=header_length, 
                         usecols=["pl_name", "hostname", "gaia_id", "sy_snum", "sy_pnum", "discoverymethod", "pl_orbper", "pl_orbsmax", "pl_rade", "pl_bmasse", "pl_dens", "pl_orbeccen", "pl_eqt", "pl_orbincl", "st_teff", "st_tefferr1", "st_tefferr2", "st_rad", "st_raderr1", "st_raderr2", "st_mass", "st_met", "st_lum", "st_lumerr1", "st_lumerr2", "st_logg", "st_age", "st_ageerr1", "st_ageerr2", "sy_dist", "sy_plx", "sy_gaiamag"])
#exculuded "st_spectype" due to csv formatting

In [5]:
print(exoplanets.head())

      pl_name  hostname                       gaia_id  sy_snum  sy_pnum  \
0    11 Com b    11 Com  Gaia DR2 3946945413106333696        2        1   
1    11 UMi b    11 UMi  Gaia DR2 1696798367260229376        1        1   
2    14 And b    14 And  Gaia DR2 1920113512486282240        1        1   
3    14 Her b    14 Her  Gaia DR2 1385293808145621504        1        2   
4  16 Cyg B b  16 Cyg B  Gaia DR2 2135550755683407232        3        1   

   discoverymethod   pl_orbper  pl_orbsmax  pl_rade  pl_bmasse  ...  st_lum  \
0  Radial Velocity   326.03000        1.29      NaN  6165.6000  ...   2.243   
1  Radial Velocity   516.21997        1.53      NaN  4684.8142  ...     NaN   
2  Radial Velocity   185.84000        0.83      NaN  1525.5000  ...   1.763   
3  Radial Velocity  1773.40002        2.93      NaN  1481.0878  ...     NaN   
4  Radial Velocity   798.50000        1.66      NaN   565.7374  ...     NaN   

   st_lumerr1  st_lumerr2  st_logg  st_age  st_ageerr1  st_ageerr2   sy_di

In [4]:
# Get rid of exoplanets whose star's haven't been observed by Gaia
exoplanets.dropna(subset=["gaia_id"], inplace=True)

In [31]:
# Not many of the exoplanets have the stellar age listed. Working out how many have stellar age:
#count_NaN = exoplanets["st_age"].isna().sum()
#count_value = exoplanets["st_age"].notna().sum()

print("Number of exoplanets with no stellar age listed: ", exoplanets["st_age"].isna().sum())
print("Number of exoplanets with stellar age listed: ", exoplanets["st_age"].notna().sum())

Number of exoplanets with no stellar age listed:  2450
Number of exoplanets with stellar age listed:  2284


In [5]:
# A few don't have stellar effective temperature listed (st_teff):
print("Number of exoplanets with no teff listed: ", exoplanets["st_teff"].isna().sum())
print("Number of exoplanets with teff listed: ", exoplanets["st_teff"].notna().sum())

Number of exoplanets with no teff listed:  227
Number of exoplanets with teff listed:  4507


In [10]:
# Some don't have stellar radius listed
print("Number of exoplanets with no st_rad listed: ", exoplanets["st_rad"].isna().sum())
print("Number of exoplanets with st_rad listed: ", exoplanets["st_rad"].notna().sum())

print("Number of exoplanets with st_raderr1 listed: ", exoplanets["st_raderr1"].notna().sum())
print("Number of exoplanets with st_raderr2 listed: ", exoplanets["st_raderr2"].notna().sum())

Number of exoplanets with no st_rad listed:  456
Number of exoplanets with st_rad listed:  4458
Number of exoplanets with st_raderr1 listed:  4305
Number of exoplanets with st_raderr2 listed:  4280


In [7]:
# Some don't have stellar age listed
print("Number of exoplanets with no st_age listed: ", exoplanets["st_age"].isna().sum())
print("Number of exoplanets with st_age listed: ", exoplanets["st_age"].notna().sum())

Number of exoplanets with no st_age listed:  2450
Number of exoplanets with st_age listed:  2284


In [41]:
# Number of stars with st_rad and st_teff:
count = 0
for i in range(exoplanets.shape[0]):
    if pd.notna(exoplanets["st_teff"].iloc[i]) and pd.notna(exoplanets["st_rad"].iloc[i]):
        count+=1
    else:
        continue

print("Number of exoplanets with st_eff and st_rad listed (can calc L): ", count)

#exoplanets["st_teff"].iloc[i]
    
# Number of stars with st_rad, st_teff and st_age:
count = 0
for i in range(exoplanets.shape[0]):
    if pd.notna(exoplanets["st_teff"].iloc[i]) and pd.notna(exoplanets["st_rad"].iloc[i]) and pd.notna(exoplanets["st_age"].iloc[i]):
        count+=1
    else:
        continue

print("Number of exoplanets with st_eff, st_rad and st_age listed (can calc SEPHI): ", count)

Number of exoplanets with st_eff and st_rad listed (can calc L):  4311
Number of exoplanets with st_eff, st_rad and st_age listed (can calc SEPHI):  2148


In [None]:
# Not many have stellar luminosity listed (but this can be calculated):
print("Number of exoplanets with no L listed: ", exoplanets["st_lum"].isna().sum())
print("Number of exoplanets with L listed: ", exoplanets["st_lum"].notna().sum())

Number of exoplanets with no L listed:  3825
Number of exoplanets with L listed:  909


In [23]:
# Do more have their luminosities listed in Gaia?
print(L_sun.value)

3.828e+26


In [9]:
# Calc. L, R, T
def calc_st_lum(T,  dT1, dT2, R, dR1, dR2):
    """
    T = stellar effective temperature [K]
    dT1 = positive error
    dT2 = negative error
    R = stellar radius [solar radius]
    dR1 = positive error
    dR2 = negative error
    
    returns stellar luminosity [W], positive error, negative error
    """
    # Convert R, dR1 and dR2 to meters:
    R = R * R_sun.value
    dR1 = dR1 * R_sun.value
    dR2 = dR2 * R_sun.value 
    
    sigma = sigma_sb.value
    
    #Calculate L if T and R =/ NaN:
    if np.isfinite(T) and np.isfinite(R):
        L = 4 * pi * sigma * R**2 * T**4 # in W
        L_solar = L / L_sun.value # in Solar Luminosities
        log_L = np.log10(L_solar) # L listed in units log(Solar) in NASA EA
        # TODO: log_L not working
        # log_L errors do not look good
        # TODO: compare outputs to those with luminosities listed, maybe calc the % difference between them
        
        # TODO: move the error calcs up here. If L is NaN the the error will be NaN too anyway. Move the elses to the main else

    else:
        #L = np.nan
        L_solar = np.nan
        log_L = np.nan
        #dlog_L1 = np.nan
        #dlog_L2 = np.nan
    
    # Calculate positive and negative errors in L if T and R have uncs.
    # Faster than calcing dlog_L for every value just to get NaN?
    if np.isfinite(dT1) and np.isfinite(dR1):
        dL1 = 4*pi*sigma_sb.value * sqrt( (2*R*T**4*dR1)**2 + (R**2*4*T**3*dT1)**2 ) # in W
        dL1_solar = dL1 / (L_sun.value) # in Solar Lum
        dlog_L1 = dL1_solar / ( L*np.log(10) ) # in log(Solar)
    else:
        dL1 = np.nan
        dL1_solar = np.nan
        dlog_L1 = np.nan
        
    if np.isfinite(dT2) and np.isfinite(dR2):
        dL2 = (-1) * 4*pi*sigma_sb.value * sqrt( (2*R*T**4*dR2)**2 + (R**2*4*T**3*dT2)**2 ) # in W
        dL2_solar = dL2 / (L_sun.value) # in Solar Lum
        dlog_L2 = dL2_solar / ( L*np.log(10) ) # in log(Solar)
    else:
        dL2 = np.nan
        dL2_solar = np.nan
        dlog_L2 = np.nan
    
    #
    return L_solar, dL1_solar, dL2_solar, log_L, dlog_L1, dlog_L2

    
    
def calc_st_rad(T, dT1, dT2, L, dL1, dL2):
    """
    T = stellar effective temperature
    dT1 = positive error
    dT2 = negative error
    L = stellar luminosity
    dL1 = positive error
    dL2 = negative error
    
    returns stellar radius, positive error, negative error
    """
    R = 0.5*T**(-2) * sqrt( L / (pi*sigma) )
    dR1 = 0
    dR2 = 0
    return R, dR1, dR2

# I don't need stellar radius for SEPHI calculations
def calc_st_teff(R, dR1, dR2, L, dL1, dL2):
    """
    R = stellar radius
    dR1 = positive error
    dR2 = negative error
    L = stellar luminosity
    dL1 = positive error
    dL2 = negative error
    
    returns stellar effective temperature, positive error, negative error
    """
    T = (2*R)**(-2) * ( L / (pi*sigma) )**(0.25)
    dT1 = 0
    dT2 = 0
    return T, dT1, dT2

In [18]:
print(exoplanets.shape[0])

4734


In [13]:
print(np.isnan(5))

False


In [43]:
#print(exoplanets.loc[:,"st_teff"])
print(pd.isna(exoplanets.loc[0,"st_teff"]))

False


In [10]:
# Estimate L for stars with T and R listed
for i in range(exoplanets.shape[0]):
    #if pd.notna(exoplanets["st_teff"].iloc[i]) and pd.notna(exoplanets["st_rad"].iloc[i]):
        L_solar, dL1_solar, dL2_solar, log_L, dlog_L1, dlog_L2 = calc_st_lum(exoplanets["st_teff"].iloc[i], exoplanets["st_tefferr1"].iloc[i], exoplanets["st_tefferr2"].iloc[i], exoplanets["st_rad"].iloc[i], exoplanets["st_raderr1"].iloc[i], exoplanets["st_raderr2"].iloc[i])
        print(L_solar, dL1_solar, dL2_solar, log_L, dlog_L1, dlog_L2)
        #print(log_L, dlog_L1, dlog_L2)
        #TODO: calc percentage error in L
        #TODO: calc percentage error in st_lum
        #TODO: compare the two
        #TODO: why are some of these repeating? - Stars with multiple planets have their L repeatedly calculated
        
        # TODO: If st_lum is listed as nan or if it is a real value with a larger error than the new value then overwrite the previous values
        """
        if pd.isna(exoplanets["st_lum"].iloc[i]) or ( dL1 < exoplanets["st_lumerr1"].iloc[i] and pd.notna(exoplanets["st_lum"].iloc[i]) ): 
            #TODO: overwrite the previous values in exoplanets df
            exoplanets.loc[i,"st_lum"] = L
            exoplanets.loc[i, "st_lumerr1"] = dL1
            exoplanets.loc[i, "st_lumerr2"] = dL2               
            print(L, dL1, dL2)
            
            
        else:                                                        
            continue  
            """
    #else:
        #continue
        

164.45499874042721 37.29783165098543 -37.29783165098543 2.2160470787453392 2.5730539235344143e-28 -2.5730539235344143e-28
251.88396507671 49.27003461491248 -49.27003461491248 2.4012005212084824 2.2191912347498625e-28 -2.2191912347498625e-28
58.49819896652726 10.680388754124674 -10.680388754124674 1.767142495296888 2.0713664978168446e-28 -2.0713664978168446e-28
0.6326650639508796 0.018044038903302834 -0.018044038903302834 -0.19882614681517047 3.2357300618330557e-29 -3.2357300618330557e-29
1.25754039366143 0.02333170641364644 -0.02333170641364644 0.09952194399636517 2.1049265266279308e-29 -2.1049265266279308e-29
180.75227439111822 11.045560364494442 -10.466256148657642 2.2570837706929745 6.932922811233776e-29 -6.56931324502933e-29
40.003688310237834 nan nan 1.6021000348014274 nan nan
nan nan nan nan nan nan
58.46255577262152 9.258725637472558 -6.523142137331439 1.7668777972288157 1.7967423311028627e-28 -1.2658767598111515e-28
14.611175688763897 0.6943127965274726 -0.6943127965274726 1.16

In [None]:
# Compare the unc in L if you calculate if vs the unc in the listed values. 
# If the unc. in the calculated value is smaller, replace the listed value

In [None]:
# Calculate other parameters
# Estimate T for stars without it listed

In [None]:
# I could then merge the exoplanets data with the phase-space densities calculated from edr3.
# I would need to use a 