In [6]:
import numpy as np
import pandas as pd
from astropy.constants import sigma_sb, L_sun, R_sun
from math import pi, sqrt

#from preprocessing.calc_stellar_params import calc_st_lum, calc_st_teff
from preprocessing.calc_stellar_params import calc_st_lum
import preprocessing.calc_sephi

In [2]:
# Exoplanet directory:
exoplanets_dir = "~/Scarlett/OneDrive - Liverpool John Moores University/SEPHI_data/NASA_EA_2022_02_09.csv"
#exoplanets_dir = "~/OneDrive/SEPHI_data/NASA_EA_2022_02_09.csv"

# The length of the header in the exoplanets csv file:
header_length = 116

In [3]:
# Read data
exoplanets = pd.read_csv(exoplanets_dir, skiprows=header_length, 
                         usecols=["pl_name", "hostname", "gaia_id", "sy_snum", "sy_pnum", "discoverymethod", "pl_orbper", "pl_orbsmax", "pl_rade", "pl_bmasse", "pl_dens", "pl_orbeccen", "pl_eqt", "pl_orbincl", "st_teff", "st_tefferr1", "st_tefferr2", "st_rad", "st_raderr1", "st_raderr2", "st_mass", "st_met", "st_lum", "st_lumerr1", "st_lumerr2", "st_logg", "st_age", "st_ageerr1", "st_ageerr2", "sy_dist", "sy_plx", "sy_gaiamag"])
#exculuded "st_spectype" due to csv formatting

In [4]:
print(exoplanets.head())

      pl_name  hostname                       gaia_id  sy_snum  sy_pnum  \
0    11 Com b    11 Com  Gaia DR2 3946945413106333696        2        1   
1    11 UMi b    11 UMi  Gaia DR2 1696798367260229376        1        1   
2    14 And b    14 And  Gaia DR2 1920113512486282240        1        1   
3    14 Her b    14 Her  Gaia DR2 1385293808145621504        1        2   
4  16 Cyg B b  16 Cyg B  Gaia DR2 2135550755683407232        3        1   

   discoverymethod   pl_orbper  pl_orbsmax  pl_rade  pl_bmasse  ...  st_lum  \
0  Radial Velocity   326.03000        1.29      NaN  6165.6000  ...   2.243   
1  Radial Velocity   516.21997        1.53      NaN  4684.8142  ...     NaN   
2  Radial Velocity   185.84000        0.83      NaN  1525.5000  ...   1.763   
3  Radial Velocity  1773.40002        2.93      NaN  1481.0878  ...     NaN   
4  Radial Velocity   798.50000        1.66      NaN   565.7374  ...     NaN   

   st_lumerr1  st_lumerr2  st_logg  st_age  st_ageerr1  st_ageerr2   sy_di

In [4]:
# Get rid of exoplanets whose star's haven't been observed by Gaia
exoplanets.dropna(subset=["gaia_id"], inplace=True)

In [31]:
# Not many of the exoplanets have the stellar age listed. Working out how many have stellar age:
#count_NaN = exoplanets["st_age"].isna().sum()
#count_value = exoplanets["st_age"].notna().sum()

print("Number of exoplanets with no stellar age listed: ", exoplanets["st_age"].isna().sum())
print("Number of exoplanets with stellar age listed: ", exoplanets["st_age"].notna().sum())

Number of exoplanets with no stellar age listed:  2450
Number of exoplanets with stellar age listed:  2284


In [5]:
# A few don't have stellar effective temperature listed (st_teff):
print("Number of exoplanets with no teff listed: ", exoplanets["st_teff"].isna().sum())
print("Number of exoplanets with teff listed: ", exoplanets["st_teff"].notna().sum())

Number of exoplanets with no teff listed:  227
Number of exoplanets with teff listed:  4507


In [10]:
# Some don't have stellar radius listed
print("Number of exoplanets with no st_rad listed: ", exoplanets["st_rad"].isna().sum())
print("Number of exoplanets with st_rad listed: ", exoplanets["st_rad"].notna().sum())

print("Number of exoplanets with st_raderr1 listed: ", exoplanets["st_raderr1"].notna().sum())
print("Number of exoplanets with st_raderr2 listed: ", exoplanets["st_raderr2"].notna().sum())

Number of exoplanets with no st_rad listed:  456
Number of exoplanets with st_rad listed:  4458
Number of exoplanets with st_raderr1 listed:  4305
Number of exoplanets with st_raderr2 listed:  4280


In [7]:
# Some don't have stellar age listed
print("Number of exoplanets with no st_age listed: ", exoplanets["st_age"].isna().sum())
print("Number of exoplanets with st_age listed: ", exoplanets["st_age"].notna().sum())

Number of exoplanets with no st_age listed:  2450
Number of exoplanets with st_age listed:  2284


In [41]:
# Number of stars with st_rad and st_teff:
count = 0
for i in range(exoplanets.shape[0]):
    if pd.notna(exoplanets["st_teff"].iloc[i]) and pd.notna(exoplanets["st_rad"].iloc[i]):
        count+=1
    else:
        continue

print("Number of exoplanets with st_eff and st_rad listed (can calc L): ", count)

#exoplanets["st_teff"].iloc[i]
    
# Number of stars with st_rad, st_teff and st_age:
count = 0
for i in range(exoplanets.shape[0]):
    if pd.notna(exoplanets["st_teff"].iloc[i]) and pd.notna(exoplanets["st_rad"].iloc[i]) and pd.notna(exoplanets["st_age"].iloc[i]):
        count+=1
    else:
        continue

print("Number of exoplanets with st_eff, st_rad and st_age listed (can calc SEPHI): ", count)

Number of exoplanets with st_eff and st_rad listed (can calc L):  4311
Number of exoplanets with st_eff, st_rad and st_age listed (can calc SEPHI):  2148


In [None]:
# Not many have stellar luminosity listed (but this can be calculated):
print("Number of exoplanets with no L listed: ", exoplanets["st_lum"].isna().sum())
print("Number of exoplanets with L listed: ", exoplanets["st_lum"].notna().sum())

Number of exoplanets with no L listed:  3825
Number of exoplanets with L listed:  909


In [23]:
# Do more have their luminosities listed in Gaia?
print(L_sun.value)

3.828e+26


In [None]:
def calc_st_teff(R, dR1, dR2, L, dL1, dL2):
    """
    R = stellar radius [solar radius]
    dR1 = positive error
    dR2 = negative error
    L = stellar luminosity
    dL1 = positive error
    dL2 = negative error
    
    returns stellar effective temperature, positive error, negative error
    """
    T = (2*R)**(-2) * ( L / (pi*sigma) )**(0.25)
    dT1 = 0
    dT2 = 0
    return T, dT1, dT2

In [18]:
print(exoplanets.shape[0])

4734


In [7]:
# Estimate L for stars in 'exoplanets'
luminosities = np.zeros( (exoplanets.shape[0],5) )
for i in range(exoplanets.shape[0]):

    luminosities[i] = calc_st_lum(exoplanets["st_teff"].iloc[i], exoplanets["st_tefferr1"].iloc[i], exoplanets["st_tefferr2"].iloc[i], exoplanets["st_rad"].iloc[i], exoplanets["st_raderr1"].iloc[i], exoplanets["st_raderr2"].iloc[i]) 
    # TODO: If st_lum is listed as nan or if it is a real value with a larger error than the new value then overwrite the previous values
    """
    if pd.isna(exoplanets["st_lum"].iloc[i]) or ( dL1 < exoplanets["st_lumerr1"].iloc[i] and pd.notna(exoplanets["st_lum"].iloc[i]) ): 
        #TODO: overwrite the previous values in exoplanets df
        exoplanets.loc[i,"st_lum"] = L
        exoplanets.loc[i, "st_lumerr1"] = dL1
        exoplanets.loc[i, "st_lumerr2"] = dL2               
        print(L, dL1, dL2)                   
    else:                                                        
        continue  
        """
print(luminosities)

[[ 2.21604708  0.0984965   4.44469367 -0.0984965   4.44469367]
 [ 2.40120052  0.08495064  3.53784033 -0.08495064  3.53784033]
 [ 1.7671425   0.07929191  4.48701278 -0.07929191  4.48701278]
 ...
 [        nan         nan         nan         nan         nan]
 [ 1.79262456         nan         nan         nan         nan]
 [ 1.83076776         nan         nan         nan         nan]]


In [8]:
# Add the luminosities columns to the exoplanets data frame:
exoplanets[ ["calc_L", "calc_Lerr1", "calc_L%err1", "calc_Lerr2", "calc_L%err2"] ] = luminosities
#print(exoplanets)

In [10]:
# Compare the uncertainties in st_lum and calc_L where calc_L has been computed: 
# Give a classification according to which error is lower:
# 0 (equal), 1 (st_lum_combined is 'best'), 2 (calc_L_combined is 'best'), 3 (no classification, neither have both +ve and -ve errors avaiable)

# Array to store classification:
classification = np.zeros( (exoplanets.shape[0]), dtype=int )

for i in range(exoplanets.shape[0]):
    
    # If both positive and negative errors are available for both st_lum and calc_L:
    if pd.notna(exoplanets["st_lumerr1"].iloc[i]) and pd.notna(exoplanets["st_lumerr2"].iloc[i]) and pd.notna(exoplanets["calc_Lerr1"].iloc[i]) and pd.notna(exoplanets["calc_Lerr2"].iloc[i]):
        # Calculate the percentage errors in st_lum:
        percent_err1 = exoplanets["st_lumerr1"].iloc[i] / exoplanets["st_lum"].iloc[i] * 100
        percent_err2 = (-1) * exoplanets["st_lumerr2"].iloc[i] / exoplanets["st_lum"].iloc[i] * 100
        
        # The mean percentage error on st_lum: 
        st_lum_combined = ( percent_err1 + percent_err2 ) / 2
        
        # The mean percentage error on calc_L:
        calc_L_combined = ( exoplanets["calc_L%err1"].iloc[i] + exoplanets["calc_L%err1"].iloc[i] ) / 2
        
        # Compare the mean percentage errors and assign a classification accorsing to which is smallest:
        if st_lum_combined == calc_L_combined:
            classification[i] = 0
        elif st_lum_combined < calc_L_combined:
            classification[i] = 1
        else: #calc_L_combined < st_lum_combined:
            classification[i] = 2
            
    # If the error is available on st_lum but not on calc_L, count st_lum as the 'better' one:
    elif pd.notna(exoplanets["st_lumerr1"].iloc[i]) and pd.notna(exoplanets["st_lumerr2"].iloc[i]) and pd.isna(exoplanets["calc_Lerr1"].iloc[i]) and pd.isna(exoplanets["calc_Lerr2"].iloc[i]):
        classification[i] = 1
    
     # If the error is available on calc_L and but not on st_lum, count calc_L as the 'better' one:
    elif pd.isna(exoplanets["st_lumerr1"].iloc[i]) and pd.isna(exoplanets["st_lumerr2"].iloc[i]) and pd.notna(exoplanets["calc_Lerr1"].iloc[i]) and pd.notna(exoplanets["calc_Lerr2"].iloc[i]):
        classification[i] = 2
    
    # If neither st_lum nor calc_L have both positive and negative errors, then they get no classification (3):
    else:
        classification[i] = 3
                                                                                                             
print(classification[0:100])
print(len(classification))

[1 2 2 2 2 1 3 1 1 1 1 2 1 1 3 3 3 2 3 2 2 2 1 1 1 1 1 2 2 2 2 2 3 2 2 2 1
 1 3 2 2 2 2 3 2 2 1 2 1 2 2 2 2 2 3 2 1 1 3 2 3 3 1 1 2 3 2 2 2 2 1 2 3 3
 2 3 2 2 2 2 2 2 2 2 2 1 2 3 3 2 2 2 2 2 2 2 2 2 2 2]
4734


In [11]:
print("Number of exoplanets with errors in st_lum < calc_L: ", np.count_nonzero(classification == 1),"\nNB: there are 909 planets with st_lum listed.") 
print("Number of exoplanets with errors in calc_L < st_lum: ", np.count_nonzero(classification == 2))

Number of exoplanets with errors in st_lum < calc_L:  448 
NB: there are 909 planets with st_lum listed.
Number of exoplanets with errors in calc_L < st_lum:  3774


In [16]:
# Add the crror class column to the exoplanets data frame
exoplanets[ "err_class" ] = classification
#print(exoplanets)

NameError: name 'classification' is not defined

In [14]:
# Calculate the stellar effective temperature from st_rad and st_lum:
def calc_st_teff(R, dR1, dR2, log_L, dlog_L1, dlog_L2):
    """
    R = stellar radius [solar radius]
    dR1 = positive error
    dR2 = negative error
    log_L = stellar luminosity
    dlog_L1 = positive error
    dlog_L2 = negative error
    
    returns stellar effective temperature, positive error, negative error
    """
    
    sigma = sigma_sb.value
    
    # Calculate T if R and L /= NaN
    if np.isfinite(R) and np.isfinite(log_L):
        
        # Convert R, dR1 and dR2 to meters:
        R = R * R_sun.value
        #dR1 = dR1 * R_sun.value
        #dR2 = dR2 * R_sun.value 
    
        # Convert log_L, dlog_L1 and dlog_L2 to W:
        L = 10**log_L * L_sun.value # [W]
        #dL1 = 10**log_L * np.log(10) * dlog_L1 * L_sun.value
        #dL2 = 10**log_L * np.log(10) * dlog_L2 * L_sun.value
        
        #T = (2*R)**(-2) * ( L / (pi*sigma) )**(0.25)
        T = (4*pi*sigma)**(-0.25) * L**(0.25) * R**(-0.5)
        
        # TODO: calculate negative and positive errors in T if errors in R and dlog_L are available:
        if np.isfinite(dR1) and np.isfinite(dlog_L1):
            # Convert dR1 to W:
            dR1 = dR1 * R_sun.value
            # Convert dlog_L1 to W
            dL1 = 10**log_L * np.log(10) * dlog_L1 * L_sun.value
            
            # Calc dT1:
            dT1 = 0.5*(4*pi*sigma)**(-0.25) * sqrt( ( 0.5*L**(-0.75)*dL1 / sqrt(R) )**2 + ( L**(-0.25)*R**(-1.5)*dR1 )**2 )
            percent_err1 = dT1 / T * 100
            
        # If dR1 and dlog_L1 aren't both avaiable, we can't calc dT1
        else:
            dT1 = np.nan
            percent_err1 = np.nan
        
        if np.isfinite(dR2) and np.isfinite(dlog_L2):
            # Convert dR1 to W:
            dR2 = dR2 * R_sun.value
            # Convert dlog_L1 to W
            dL2 = 10**log_L * np.log(10) * dlog_L2 * L_sun.value
            
            # Calc dT2:
            dT2 = 0.5*(4*pi*sigma)**(-0.25) * sqrt( ( 0.5*L**(-0.75)*dL2 / sqrt(R) )**2 + ( L**(-0.25)*R**(-1.5)*dR2 )**2 )
            percent_err2 = dT2 / T * 100
            
        # If dR2 and dlog_L2 aren't both avaiable, we can't calc dT2:
        else:
            dT2 = np.nan
            percent_err2 = np.nan
        
    # If either R or L are NaN, then we can't calc T or its errors:
    else:
        T = np.nan
        dT1 = np.nan
        dT2 = np.nan
        percent_err1 = np.nan
        percent_err2 = np.nan
    
    
    return T, dT1, percent_err1, dT2, percent_err2

In [15]:
# Estimate T for stars in 'exoplanets'
temps = np.zeros( (exoplanets.shape[0], 5) )
for i in range(exoplanets.shape[0]):

    temps[i] = calc_st_teff(exoplanets["st_rad"].iloc[i], exoplanets["st_raderr1"].iloc[i], exoplanets["st_raderr2"].iloc[i], exoplanets["st_lum"].iloc[i], exoplanets["st_lumerr1"].iloc[i], exoplanets["st_lumerr2"].iloc[i]) 

print(temps)

# TODO: compare the results with the exoplanets spreadsheet

[[4.81614751e+03 1.96840213e+02 4.08708854e+00 2.35653776e+02
  4.89299332e+00]
 [           nan            nan            nan            nan
             nan]
 [4.80153654e+03            nan            nan            nan
             nan]
 ...
 [           nan            nan            nan            nan
             nan]
 [4.85657558e+03 6.17842989e+02 1.27217826e+01 1.53761830e+02
  3.16605450e+00]
 [4.80270551e+03            nan            nan            nan
             nan]]


In [None]:
# TODO: add calc_teff to exoplanets df
# TODO: compare the uncertainties in st_teff with temps
# TODO: determine which uncertainties are smallest
# TODO: count how many st_teff have smaller error and how many temp have smaller error

In [None]:
# I could then merge the exoplanets data with the phase-space densities calculated from edr3.
# I would need to use a 