In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import seaborn as sns; sns.set()

from collections import Counter

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report, accuracy_score

from sklearn.naive_bayes import GaussianNB, MultinomialNB

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline

In [2]:
pd.set_option('display.expand_frame_repr', True)

Data from http://exoplanet.eu/catalog/

Showing 5346 planets / 3943 planetary systems / 855 multiple planet systems 

**Goal**: implement NB to classify into habitable and not habitable

In [5]:
exoplanets = pd.read_csv('exoplanet_catalog.csv')
exoplanets

Unnamed: 0,name,planet_status,mass,mass_error_min,mass_error_max,mass_sini,mass_sini_error_min,mass_sini_error_max,radius,radius_error_min,...,star_sp_type,star_age,star_age_error_min,star_age_error_max,star_teff,star_teff_error_min,star_teff_error_max,star_detected_disc,star_magnetic_field,star_alternate_names
0,11 Com b,Confirmed,,,,16.1284,1.53491,1.53491,,,...,G8 III,,,,4742.0,100.0,100.0,,,
1,11 Oph b,Confirmed,21.000,3.00,3.000,,,,,,...,M9,0.011,0.002,0.002,2375.0,175.0,175.0,,,"Oph 1622-2405, Oph 11A"
2,11 UMi b,Confirmed,,,,11.0873,1.10000,1.10000,,,...,K4III,1.560,0.540,0.540,4340.0,70.0,70.0,,,
3,14 And b,Confirmed,,,,4.6840,0.23000,0.23000,,,...,K0III,,,,4813.0,20.0,20.0,,,
4,14 Her b,Confirmed,9.653,1.67,2.331,5.2150,0.30000,0.30000,,,...,K0 V,5.100,,,5311.0,87.0,87.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5341,ups And c,Confirmed,9.100,2.93,5.040,1.8000,0.26000,0.26000,,,...,F8 V,3.800,1.000,1.000,6212.0,80.0,80.0,,,
5342,ups And d,Confirmed,23.580,2.29,2.930,10.1900,,,,,...,F8 V,3.800,1.000,1.000,6212.0,80.0,80.0,,,
5343,ups And e,Confirmed,,,,1.0590,0.02800,0.02800,,,...,F8 V,3.800,1.000,1.000,6212.0,80.0,80.0,,,
5344,ups Leo b,Confirmed,,,,0.5100,0.26000,0.08000,,,...,G9 III,,,,,,,,,


In [6]:
exoplanets.columns

Index(['name', 'planet_status', 'mass', 'mass_error_min', 'mass_error_max',
       'mass_sini', 'mass_sini_error_min', 'mass_sini_error_max', 'radius',
       'radius_error_min', 'radius_error_max', 'orbital_period',
       'orbital_period_error_min', 'orbital_period_error_max',
       'semi_major_axis', 'semi_major_axis_error_min',
       'semi_major_axis_error_max', 'eccentricity', 'eccentricity_error_min',
       'eccentricity_error_max', 'inclination', 'inclination_error_min',
       'inclination_error_max', 'angular_distance', 'discovered', 'updated',
       'omega', 'omega_error_min', 'omega_error_max', 'tperi',
       'tperi_error_min', 'tperi_error_max', 'tconj', 'tconj_error_min',
       'tconj_error_max', 'tzero_tr', 'tzero_tr_error_min',
       'tzero_tr_error_max', 'tzero_tr_sec', 'tzero_tr_sec_error_min',
       'tzero_tr_sec_error_max', 'lambda_angle', 'lambda_angle_error_min',
       'lambda_angle_error_max', 'impact_parameter',
       'impact_parameter_error_min', 'impa

name,
planet_status,
mass,
mass_sini,
radius,
orbital_period,
semi_major_axis,
eccentricity,
inclination,
angular_distance,
discovered,
updated,
omega,
tperi,
tconj,
tzero_tr,
tzero_tr_sec,
lambda_angle,
impact_parameter,
tzero_vr,
k,
temp_calculated,
temp_measured,
hot_point_lon,
geometric_albedo,log_g,
publication,
detection_type,
mass_detection_type,
radius_detection_type,
alternate_names,
molecules,
star_name,
ra,
dec,
mag_v,
mag_i,
mag_j,
mag_h,
mag_k,
star_distance,
star_metallicity,
star_mass,
star_radius,
star_sp_type,
star_age,
star_teff,
star_detected_disc,
star_magnetic_field,
star_alternate_names

## Define Labels

### Habitable Zone

This __[GitHub](https://github.com/ageller/HZ_Kopparapu)__ implements a function to define the habitable zone of a stellar system with exoplanets as defined by __[Kopparapu et al. 2013](https://ui.adsabs.harvard.edu/abs/2013ApJ...765..131K/abstract)__, and they also provide the needed coefficients.

In [7]:
def getHZ(L, T, inC, outC):

    # L == luminosity of the star 
    # T == effective temperature of the star
    # inC and outC == the relevant inner and outer coefficients list, from Kopparapu's Table 3
    # these coefficients are supplied in the HZ_coefficients.dat file"
    
    Lstar = L  #.value_in(units.LSun)
    Tstar = T  #.value_in(units.K)

    #Kopparapu HZ
    tst = Tstar - 5780.0
    #THIS IS THE LIMIT OF THE Kopparapu results.  Not sure what to do outside of this!
    if (Tstar > 7200): 
        print("!!!WARNING: Teff > 7200 K -- outside of Kopparapu model limits -- setting Tstar=7200-5780")
        tst = 7200. - 5780.
    if (Tstar < 2600): 
        print("!!!WARNING: Teff < 2600 K -- outside of Kopparapu model limits -- setting Tstar=2600-5780")
        tst = 2600. - 5780.
    Seff_inRK = inC[0] + inC[1]*tst + inC[2]*tst**2 + inC[3]*tst**3 + inC[4]*tst**4
    Seff_outRK = outC[0] + outC[1]*tst + outC[2]*tst**2 + outC[3]*tst**3 + outC[4]*tst**4
    dinRK = np.sqrt(Lstar / Seff_inRK) #| units.AU
    doutRK = np.sqrt(Lstar / Seff_outRK) #| units.AU

    return dinRK, doutRK

In [9]:
HZdf = pd.read_csv("HZ_coefficients.dat", sep = ' ', skipinitialspace = True, skiprows = 13, 
              names = ['S1','S2','S3','S4','S5'])
HZdf

Unnamed: 0,S1,S2,S3,S4,S5
0,1.7753,1.0512,1.014,0.3438,0.3179
1,0.00014316,0.00013242,8.1774e-05,5.8942e-05,5.4513e-05
2,2.9875e-09,1.5418e-08,1.7063e-09,1.6558e-09,1.5313e-09
3,-7.5702e-12,-7.9895e-12,-4.3241e-12,-3.0045e-12,-2.7786e-12
4,-1.1635e-15,-1.8328e-15,-6.6462e-16,-5.2983e-16,-4.8997e-16


### Semi-Minor Axis

\begin{align}
    e = \sqrt{1 - \frac{b^2}{a^2}}
\end{align}

In [10]:
def semi_minor_axis(a, e):
    '''
    a: semi-major axis
    e: eccentricity
    '''
    return a * np.sqrt(1 - e**2)

In [14]:
exoplanets['eccentricity'].describe()

count    2163.000000
mean        0.165130
std         0.199494
min         0.000000
25%         0.019000
50%         0.100000
75%         0.244750
max         3.155000
Name: eccentricity, dtype: float64

In [19]:
exoplanets.shape

(5346, 99)

In [18]:
exoplanets.drop(exoplanets.loc[exoplanets['eccentricity'] <= 1].index)

Unnamed: 0,name,planet_status,mass,mass_error_min,mass_error_max,mass_sini,mass_sini_error_min,mass_sini_error_max,radius,radius_error_min,...,star_age,star_age_error_min,star_age_error_max,star_teff,star_teff_error_min,star_teff_error_max,star_detected_disc,star_magnetic_field,star_alternate_names,semi_minor_axis
1,11 Oph b,Confirmed,21.0,3.0,3.0,,,,,,...,0.0110,0.002,0.002,2375.0,175.0,175.0,,,"Oph 1622-2405, Oph 11A",
9,1RXS 1609 b,Confirmed,14.0,3.0,2.0,,,,1.70,,...,0.0110,0.002,0.002,4060.0,200.0,200.0,,,"1RXS1609, 1RXS J1609, 1RXS J160929",
10,1RXS J235133.3+312720 b,Confirmed,32.0,6.0,6.0,,,,,,...,0.1000,0.050,0.050,,,,,,2MASS J23513366+3127229,
11,1SWASP J1407 b,Confirmed,,,,20.0,6.0,6.0,,,...,0.0160,,,4400.0,100.0,100.0,,,,
15,2M 0103-55 (AB) b,Confirmed,13.0,1.0,1.0,,,,,,...,0.0300,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5296,ZTFJ2252-05 b,Confirmed,26.0,8.0,8.0,,,,0.49,0.04,...,,,,15200.0,900.0,900.0,,,,
5297,[BHB2007]-1 b,Confirmed,42.0,5.0,5.0,,,,,,...,0.0057,,,4060.0,300.0,300.0,,,,
5300,beta Cir b,Confirmed,56.0,7.0,7.0,,,,,,...,0.3670,,,8676.0,33.0,33.0,,,bet Cir,
5329,omi UMa A b,Confirmed,,,,4.1,,,,,...,,,,5242.0,10.0,10.0,,,omi UMa,


In [13]:
exoplanets['semi_minor_axis'] = exoplanets.apply(lambda x: semi_minor_axis(x['semi_major_axis'], x['eccentricity']), axis=1)

  


### Luminosity

For stars in the main sequence:
\begin{align}
    L \propto M^{3.5}
\end{align}

In [20]:
exoplanets['star_lum'] = exoplanets.apply(lambda x: x['star_mass']**(3.5), axis=1)

## Prepare dataset