In [1]:
import numpy as np
import pandas as pd
from math import pi, sqrt

import astropy.table #import tables
from astropy import units as u
from astropy import constants as const

from preprocessing.calc_stellar_params import calc_luminosity, calc_temp
from preprocessing.analyse_errs import classify_err
#from preprocessing.calc_sephi import get_sephi_RM17


In [2]:
# Exoplanet directory:
exoplanets_dir = "~/Scarlett/OneDrive - Liverpool John Moores University/SEPHI_data/NASA_EA_2022_02_09.csv"
#exoplanets_dir = "~/OneDrive/SEPHI_data/NASA_EA_2022_02_09.csv"

# The length of the header in the exoplanets csv file:
header_length = 116

In [3]:
# Read data
exoplanets = pd.read_csv(exoplanets_dir, skiprows=header_length, 
                         usecols=["pl_name", "hostname", "gaia_id", "sy_snum", "sy_pnum", "discoverymethod", "pl_orbper", "pl_orbsmax", "pl_rade", "pl_bmasse", "pl_dens", "pl_orbeccen", "pl_eqt", "pl_orbincl", "st_teff", "st_tefferr1", "st_tefferr2", "st_rad", "st_raderr1", "st_raderr2", "st_mass", "st_met", "st_lum", "st_lumerr1", "st_lumerr2", "st_logg", "st_age", "st_ageerr1", "st_ageerr2", "sy_dist", "sy_plx", "sy_gaiamag"])
# pl_orbper = orbital period [days]
# pl_orbsmax = orbit semi-major axis [au]
#exculuded "st_spectype" due to csv formatting

In [4]:
#print(exoplanets.head())

In [4]:
# Get rid of exoplanets whose star's haven't been observed by Gaia
exoplanets.dropna(subset=["gaia_id"], inplace=True)

In [31]:
# Not many of the exoplanets have the stellar age listed. Working out how many have stellar age:
#count_NaN = exoplanets["st_age"].isna().sum()
#count_value = exoplanets["st_age"].notna().sum()

print("Number of exoplanets with no stellar age listed: ", exoplanets["st_age"].isna().sum())
print("Number of exoplanets with stellar age listed: ", exoplanets["st_age"].notna().sum())

Number of exoplanets with no stellar age listed:  2450
Number of exoplanets with stellar age listed:  2284


In [5]:
# A few don't have stellar effective temperature listed (st_teff):
print("Number of exoplanets with no teff listed: ", exoplanets["st_teff"].isna().sum())
print("Number of exoplanets with teff listed: ", exoplanets["st_teff"].notna().sum())

Number of exoplanets with no teff listed:  227
Number of exoplanets with teff listed:  4507


In [10]:
# Some don't have stellar radius listed
print("Number of exoplanets with no st_rad listed: ", exoplanets["st_rad"].isna().sum())
print("Number of exoplanets with st_rad listed: ", exoplanets["st_rad"].notna().sum())

print("Number of exoplanets with st_raderr1 listed: ", exoplanets["st_raderr1"].notna().sum())
print("Number of exoplanets with st_raderr2 listed: ", exoplanets["st_raderr2"].notna().sum())

Number of exoplanets with no st_rad listed:  456
Number of exoplanets with st_rad listed:  4458
Number of exoplanets with st_raderr1 listed:  4305
Number of exoplanets with st_raderr2 listed:  4280


In [7]:
# Some don't have stellar age listed
print("Number of exoplanets with no st_age listed: ", exoplanets["st_age"].isna().sum())
print("Number of exoplanets with st_age listed: ", exoplanets["st_age"].notna().sum())

Number of exoplanets with no st_age listed:  2450
Number of exoplanets with st_age listed:  2284


In [41]:
# Number of stars with st_rad and st_teff:
count = 0
for i in range(exoplanets.shape[0]):
    if pd.notna(exoplanets["st_teff"].iloc[i]) and pd.notna(exoplanets["st_rad"].iloc[i]):
        count+=1
    else:
        continue

print("Number of exoplanets with st_eff and st_rad listed (can calc L): ", count)

#exoplanets["st_teff"].iloc[i]
    
# Number of stars with st_rad, st_teff and st_age:
count = 0
for i in range(exoplanets.shape[0]):
    if pd.notna(exoplanets["st_teff"].iloc[i]) and pd.notna(exoplanets["st_rad"].iloc[i]) and pd.notna(exoplanets["st_age"].iloc[i]):
        count+=1
    else:
        continue

print("Number of exoplanets with st_teff, st_rad and st_age listed (can calc SEPHI): ", count)

Number of exoplanets with st_eff and st_rad listed (can calc L):  4311
Number of exoplanets with st_eff, st_rad and st_age listed (can calc SEPHI):  2148


In [None]:
# Not many have stellar luminosity listed (but this can be calculated):
print("Number of exoplanets with no L listed: ", exoplanets["st_lum"].isna().sum())
print("Number of exoplanets with L listed: ", exoplanets["st_lum"].notna().sum())

# Do more have their luminosities listed in Gaia?

Number of exoplanets with no L listed:  3825
Number of exoplanets with L listed:  909


In [18]:
print(exoplanets.shape[0])

4734


In [5]:
# Estimate L for stars in 'exoplanets'
luminosities = np.zeros( (exoplanets.shape[0],5) )
for i in range(exoplanets.shape[0]):

    luminosities[i] = calc_luminosity(exoplanets["st_teff"].iloc[i], exoplanets["st_tefferr1"].iloc[i], exoplanets["st_tefferr2"].iloc[i], exoplanets["st_rad"].iloc[i], exoplanets["st_raderr1"].iloc[i], exoplanets["st_raderr2"].iloc[i]) 
    # TODO: If st_lum is listed as nan or if it is a real value with a larger error than the new value then overwrite the previous values
    """
    if pd.isna(exoplanets["st_lum"].iloc[i]) or ( dL1 < exoplanets["st_lumerr1"].iloc[i] and pd.notna(exoplanets["st_lum"].iloc[i]) ): 
        #TODO: overwrite the previous values in exoplanets df
        exoplanets.loc[i,"st_lum"] = L
        exoplanets.loc[i, "st_lumerr1"] = dL1
        exoplanets.loc[i, "st_lumerr2"] = dL2               
        print(L, dL1, dL2)                   
    else:                                                        
        continue  
        """
#print(luminosities)

In [6]:
# Add the luminosities columns to the exoplanets data frame:
exoplanets[ ["calc_L", "calc_Lerr1", "calc_L%err1", "calc_Lerr2", "calc_L%err2"] ] = luminosities
#print(exoplanets)

In [7]:
# Array to store classification:
classes_L = np.zeros( (exoplanets.shape[0]), dtype=int )
#print(classes_L[0:40])

# Classify the luminosity uncertainties using the function:
for i in range(exoplanets.shape[0]):
    classes_L[i] = classify_err(exoplanets["st_lum"].iloc[i], exoplanets["st_lumerr1"].iloc[i], exoplanets["st_lumerr2"].iloc[i], exoplanets["calc_L%err1"].iloc[i], exoplanets["calc_L%err2"].iloc[i])
                                                                                                             
print(classes_L[0:100])
print(len(classes_L))

[1 2 2 2 2 1 3 1 1 1 1 1 1 1 3 3 3 1 3 2 2 2 1 1 1 1 1 2 2 2 2 2 3 1 1 1 1
 1 3 2 2 2 2 3 2 2 1 2 1 2 2 2 2 2 3 2 1 1 3 2 3 3 1 1 2 3 2 1 1 2 1 1 3 3
 1 3 2 2 2 2 2 2 2 2 2 1 2 3 3 2 2 2 2 2 2 2 2 2 2 2]
4734


In [8]:
print("Number of exoplanets with errors in st_lum < calc_L or no dcalc_L: ", np.count_nonzero(classes_L == 1),"\nNB: there are 909 planets with st_lum listed.") 
print("Number of exoplanets with errors in calc_L < st_lum or no dst_lum: ", np.count_nonzero(classes_L == 2))

Number of exoplanets with errors in st_lum < calc_L or no dcalc_L:  646 
NB: there are 909 planets with st_lum listed.
Number of exoplanets with errors in calc_L < st_lum or no dst_lum:  3574


In [8]:
# Add the crror class column to the exoplanets data frame
exoplanets[ "Lerr_class" ] = classes_L
#print(exoplanets)

In [9]:
# Estimate T for stars in 'exoplanets'
temps = np.zeros( (exoplanets.shape[0], 5) )
for i in range(exoplanets.shape[0]):

    temps[i] = calc_temp(exoplanets["st_rad"].iloc[i], exoplanets["st_raderr1"].iloc[i], exoplanets["st_raderr2"].iloc[i], exoplanets["st_lum"].iloc[i], exoplanets["st_lumerr1"].iloc[i], exoplanets["st_lumerr2"].iloc[i]) 

#print(temps)

In [10]:
# Add calc_teff to exoplanets df:
exoplanets[ ["calc_T", "calc_Terr1", "calc_T%err1", "calc_Terr2", "calc_T%err2"] ] = temps
#print(exoplanets.head(10))
# Temps and errors look good!

In [11]:
# Compare the uncertainties in st_teff and calc_teff: 
# Give a classification according to which error is lower:
# 0 (equal), 1 (st_lum_combined is 'best'), 2 (calc_L_combined is 'best'), 3 (no classification, neither have both +ve and -ve errors avaiable)

# Array to store classification:
classes_T = np.zeros( (exoplanets.shape[0]), dtype=int )

for i in range(exoplanets.shape[0]):
    classes_T[i] = classify_err(exoplanets["st_teff"].iloc[i], exoplanets["st_tefferr1"].iloc[i], exoplanets["st_tefferr2"].iloc[i], exoplanets["calc_T%err1"].iloc[i], exoplanets["calc_T%err2"].iloc[i])
                                                                                                             
print(classes_T[0:100])
print(len(classes_T))

[1 1 1 1 1 1 1 1 1 2 2 2 1 1 3 3 3 1 1 1 1 1 2 2 2 3 2 1 1 1 1 1 3 2 2 2 2
 2 3 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 3 3
 2 3 1 1 1 1 1 1 1 1 1 1 1 3 3 1 1 1 1 1 1 1 1 1 1 1]
4734


In [14]:
# Counting how many st_teff have smaller error and how many temp have smaller error
print("Number of exoplanets with errors in st_teff < calc_teff or incomplete errors on calc_teff: ", np.count_nonzero(classes_T == 1),"\nNB: there are 4507 planets with st_teff listed.") 
print("Number of exoplanets with errors in calc_T < st_teff or incomplete errors on st_teff: ", np.count_nonzero(classes_T == 2))

Number of exoplanets with errors in st_teff < calc_teff or incomplete errors on calc_teff:  4103 
NB: there are 4507 planets with st_teff listed.
Number of exoplanets with errors in calc_T < st_teff or incomplete errors on st_teff:  272


In [12]:
# Adding the classes_T array to the exoplanets df
exoplanets["Terr_class"] = classes_T
print(exoplanets)

         pl_name  hostname                       gaia_id  sy_snum  sy_pnum  \
0       11 Com b    11 Com  Gaia DR2 3946945413106333696        2        1   
1       11 UMi b    11 UMi  Gaia DR2 1696798367260229376        1        1   
2       14 And b    14 And  Gaia DR2 1920113512486282240        1        1   
3       14 Her b    14 Her  Gaia DR2 1385293808145621504        1        2   
4     16 Cyg B b  16 Cyg B  Gaia DR2 2135550755683407232        3        1   
...          ...       ...                           ...      ...      ...   
4909   ups And b   ups And   Gaia DR2 348020448377061376        2        3   
4910   ups And c   ups And   Gaia DR2 348020448377061376        2        3   
4911   ups And d   ups And   Gaia DR2 348020448377061376        2        3   
4912   ups Leo b   ups Leo  Gaia DR2 3794167001116433152        1        1   
4913    xi Aql b    xi Aql  Gaia DR2 4298361114750843904        1        1   

      discoverymethod    pl_orbper  pl_orbsmax  pl_rade   pl_bm

In [14]:
#exoplanets_tab = astropy.table.Table.from_pandas(exoplanets)
#exoplanets["pl_rade"].unit = astropy.units.earthRad
#print(exoplanets_tab)

In [None]:
# TODO: I can drop the two percentage errors on L and T

In [15]:
# TODO: run an exoplanet through the sephi code
# convert the inputs add units
pl_mass = exoplanets["pl_bmasse"].iloc[13] * u.earthMass
pl_rad = exoplanets["pl_rade"].iloc[13] * u.earthRad
pl_a = exoplanets["pl_orbsmax"].iloc[13] * u.AU
teff = exoplanets["st_teff"].iloc[13] * u.K
lum = exoplanets["st_lum"].iloc[13] * u.dex(u.L_sun)
age = exoplanets["st_age"].iloc[13] * 10**9 * u.yr
print("planet mass, radius and semi-major axis: ", pl_mass, pl_rad, pl_ a)

planet mass, radius and semi-major axis:  4417.837 earthMass 16.141 earthRad 156.0 AU


In [None]:
# I could then merge the exoplanets data with the phase-space densities calculated from edr3.
# I would need to use a 