In [102]:
import numpy as np
import pandas as pd
import os
from pprint import pprint 
# import format_dataset as fd
from astropy.units import earthMass, jupiterMass, earthRad, jupiterRad, solMass, solRad, AU

# import exo-planets dataset

In [112]:
print('\nLoading exoplanet dataset and solar system planets:')
exoplanet_path = '../published_output/exoplanet.eu_catalog_23Nov.csv'

dataset_exo = pd.read_csv(exoplanet_path, index_col=0)
print(dataset_exo.shape)
dataset_exo


Loading exoplanet dataset and solar system planets:
(5529, 97)


Unnamed: 0_level_0,planet_status,mass,mass_error_min,mass_error_max,mass_sini,mass_sini_error_min,mass_sini_error_max,radius,radius_error_min,radius_error_max,...,star_sp_type,star_age,star_age_error_min,star_age_error_max,star_teff,star_teff_error_min,star_teff_error_max,star_detected_disc,star_magnetic_field,star_alternate_names
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11 Com b,Confirmed,16.1284,1.53491,1.53491,16.1284,1.53491,1.53491,,,,...,G8 III,,,,4742.0,100.0,100.0,,,
11 Oph b,Confirmed,21.0000,3.00000,3.00000,,,,,,,...,M9,0.011,0.002,0.002,2375.0,175.0,175.0,,,"Oph 1622-2405, Oph 11A"
11 UMi b,Confirmed,11.0873,1.10000,1.10000,11.0873,1.10000,1.10000,,,,...,K4III,1.560,0.540,0.540,4340.0,70.0,70.0,,,
14 And b,Confirmed,4.6840,0.23000,0.23000,4.6840,0.23000,0.23000,,,,...,K0III,,,,4813.0,20.0,20.0,,,
14 Her b,Confirmed,,,,,4.95000,4.95000,,,,...,K0 V,5.100,,,5311.0,87.0,87.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZTFJ0220+21 b,Confirmed,14.0000,6.00000,6.00000,,,,0.54,0.07,0.07,...,WD,,,,14200.0,1000.0,1000.0,,,
ZTFJ0407-00 b,Confirmed,19.0000,3.00000,3.00000,,,,0.44,0.02,0.02,...,WD,,,,17400.0,1200.0,1200.0,,,
ZTF-J1622+47 b,Confirmed,61.0000,19.00000,19.00000,,,,0.98,0.02,0.02,...,,,,,29000.0,,,,,
ZTFJ1637+49 b,Confirmed,23.0000,8.00000,8.00000,,,,0.68,0.07,0.07,...,WD,,,,11200.0,300.0,300.0,,,


we have a dataset who has 4037 exoplanets and 97 features for each one. Not all features are needed in our experiments, the author pick some (8 in total) which is:
1. mass
2. semi_major_axis
3. eccentricity
4. star_metallicity
5. star_radius
6. star_teff
7. star_mass
8. radius

In [113]:
feature_names=['mass', 'semi_major_axis','eccentricity', 'star_metallicity',
                'star_radius', 'star_teff','star_mass', 'radius']
dataset_exo = dataset_exo[feature_names]
print(f'the original exo dataset shape is {dataset_exo.shape}')
# Removes the planets with NaN values, 
dataset_exo = dataset_exo.dropna(axis=0, how='any')
print(f'the usable exo dataset shape is {dataset_exo.shape}')

the original exo dataset shape is (5529, 8)
the usable exo dataset shape is (872, 8)


In [121]:
solar_path = '../published_output/solar_system_planets_catalog.csv'

# feature_names=['mass', 'semi_major_axis','eccentricity', 'star_metallicity', 'star_radius', 'star_teff', 'star_mass', 'radius']
dataset_solar_system = pd.read_csv(solar_path, index_col=0)
dataset_solar_system = dataset_solar_system[feature_names]
print(f'the original solar dataset shape is {dataset_solar_system.shape}')
# Removes the planets with NaN values, 
dataset_exo = dataset_exo.dropna(axis=0, how='any')
print(f'the usable solar dataset shape is {dataset_solar_system.shape}')
# add solar system planet into exoplanet dataset
dataset = dataset_exo


the original solar dataset shape is (8, 8)
the usable solar dataset shape is (8, 8)


# Reformat the dataset
## Astronomy Background and Equations:
### Get planet temperature:
This equation calculates the equilibrium temperature of a planet, which is a theoretical temperature that the planet would be at if it were a perfect blackbody, absorbing all incident stellar radiation and then re-radiating it back into space.

The formula is derived from several principles, including the Stefan-Boltzmann law which relates the temperature of a blackbody to its emitted radiation and the concept that the absorbed radiation must balance the emitted radiation for the planet to be in thermal equilibrium.

$$ T_{eq} = T_{eff\ast} \left( f \times (1 - A) \right)^{\frac{1}{4}} \times \sqrt{\frac{1}{a/R\ast}} \times \left(1 - e^2\right)^{-\frac{1}{8}} $$

Where:
$T_{eq}$ = Equilibrium temperature of the planet

$T_{eff*}$ = Effective temperature of the star

$f$ = Redistribution factor of absorbed energy

$A$ = Bond albedo of the planet

$a/R∗$ = Semi-major axis of the planet's orbit divided by the star's radius

$e$ = Eccentricity of the planet's orbit

To Calculate this parameter, the python method is given by:

In [122]:
def plant_tem(Teffst, aR, ecc, f=1/4., A=0):
    """
    Parameters
    ----------
    Teffst : float
        The effective temperature of the star (T_eff_star), usually in Kelvin (K).
        
    f : float
        The heat redistribution factor for the planet's atmosphere. This factor accounts for
        how the heat is distributed across the planet's surface. A value of 1/4 suggests uniform
        redistribution (f).

    A : float
        The Bond albedo (A) representing the fraction of the total energy reflected by the planet.
        It ranges from 0 (no reflection) to 1 (total reflection).

    aR : float
        The ratio of the semi-major axis of the planet's orbit to the stellar radius (a/R_star),
        a dimensionless quantity that determines the distance factor for received stellar radiation.

    ecc : float
        The eccentricity of the planet's orbit (e). Values range from 0 (circular orbit) to
        nearly 1 (highly elliptical orbit), affecting the average distance to the star and
        the resulting stellar radiation received by the planet.
    """
    return Teffst * (f * (1 - A))**(1 / 4.) * np.sqrt(1 / aR) / (1 - ecc**2)**(1/8.)

def add_temp_eq_dataset(dataset):
    semi_major_axis = dataset.semi_major_axis * AU.to('solRad')
    teq_planet = [plant_tem(teff, a/rad, ecc)
                  for teff, a, rad, ecc,
                  in zip(dataset.star_teff, semi_major_axis,
                         dataset.star_radius, dataset.eccentricity)]
    dataset.insert(2, 'temp_eq', teq_planet)
    return dataset

### Get Star Luminosity
$$\dfrac{\text{Star Luminosity}}{\text{Sun Luminosity}} =  (\dfrac{\text{Star Radius}}{\text{Sun Radius}})^{2}\times (\dfrac{\text{Star Teff}}{\text{Sun Teff}})^{4}$$

In [123]:
def add_star_luminosity_dataset(dataset):
    """Compute the stellar luminosity
    L_star/L_sun = (R_star/R_sun)**2 * (Teff_star / Teff_sun)**4
    Radius star is already expressed in Sun radii in the dataset
    lum_sun    = 3.828 * 10**26   # Watt
    radius_sun = 6.95508 * 10**8  # meters"""
    Teff_sun = 5777.0                 # Kelvin
    L_star = [R_star**2 * (Teff_star / Teff_sun)**4
              for R_star, Teff_star
              in zip(dataset.star_radius, dataset.star_teff)]
    dataset.insert(2, 'star_luminosity', L_star)
    return dataset

In [124]:
dataset = add_temp_eq_dataset(dataset)
dataset = add_star_luminosity_dataset(dataset)
dataset.shape

(872, 10)

## Convert some radius and mass to Jupiter and Earth
From **ChatGPT**:The conversion from Jupiter to Earth units is common in exoplanetary science because the first exoplanets discovered were mostly gas giants similar to Jupiter in mass and size. Therefore, many of the early measurements and discoveries were naturally compared to Jupiter's characteristics. As such, Jupiter became a reference point for large exoplanets, especially gas giants.

However, as the search for exoplanets has expanded and technology has improved, astronomers have found a plethora of smaller planets, some of which are more Earth-like in their mass and size. Since Earth is our home planet and the only one we know that harbors life, there is particular interest in finding and studying Earth-sized exoplanets. This interest is heightened by questions about the habitability of other planets and the search for life beyond our Solar System.

For these reasons, scientists often convert measurements into Earth units when:

- Studying smaller exoplanets: To analyze planets that are smaller, like "super-Earths" or terrestrial planets, it makes more sense to use Earth as the reference standard because their characteristics are more similar to our own planet.
- Comparing within a wider range: When assessing a broad range of exoplanets, from gas giants to terrestrial planets, it's useful to have a common unit of comparison. Since Earth units are on the smaller end of the scale, they can provide a more granular comparison for size and mass.
- Relating to potential habitability: For many studies, particularly those concerned with habitability, Earth units are the most relevant because they directly relate to conditions known to support life.
In short, while Jupiter units are still used for very large exoplanets, Earth units provide a more applicable scale for the wide range of exoplanets we now observe, especially when focusing on smaller, potentially habitable worlds.

In [125]:
def jupiter_to_earth(dataset, feature):
    assert feature in ['radius', 'mass']
    # assert column in ['radius', 'mass']
    if feature == 'radius':
        df = dataset[feature].apply(lambda x:(x*jupiterRad).to('earthRad').value)
    else:
        df = dataset[feature].apply(lambda x:(x*jupiterMass).to('earthMass').value)
    new_df = pd.DataFrame({feature: df})
    dataset.update(new_df)
    return dataset

In [126]:
dataset = jupiter_to_earth(dataset, 'mass')
dataset = jupiter_to_earth(dataset, 'radius')

### data-correction: Kapler-10c: **Don't know the reason**

In [127]:
print(dataset_exo.loc['Kepler-10 c'].mass)
dataset_exo.loc['Kepler-10 c'].mass = 17.2
print(dataset_exo.loc['Kepler-10 c'].mass, '\n')

5466.6485934284065
17.2 



In [129]:
dataset = pd.concat([dataset, dataset_solar_system], axis=0)

# 