# Identifying the Best Match for Earth for a possible habitable planet

In [1]:
# importing
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
import warnings
warnings.filterwarnings('ignore')

In [2]:
# loading dataset
dataset = pd.read_csv('filtered_exoplanet_dataset_removed_outliers.csv')

In [3]:
dataset

Unnamed: 0,planet_name,host_star_name,orbital_period_day,orbit_semi_major_axis_au,planet_rad_earth_rad,planet_rad_jup_rad,planet_mass_earth_mass,planet_mass_jup_mass,Eccentricity,planet_equil_temp_k,spectral_type,stellar_effective_temp_k,stellar_rad_solar_rad,stellar_mass_solar_mass,distance_parsec,stellar_surf_grav,declination_deg,discovery_method
0,11 Com b,11 Com,324.620000,1.226000,,,5505.066163,17.321333,0.234500,,G8 III,4808.000000,16.380000,2.463333,93.1846,2.3800,17.793252,Radial Velocity
1,11 UMi b,11 UMi,516.219985,1.526667,,,3818.094733,12.013333,0.080000,,K4 III,4276.500000,26.935000,2.093333,125.3210,1.7650,71.823943,Radial Velocity
2,14 And b,14 And,186.300000,0.761667,,,1224.550433,3.853000,0.000000,,K0 III,4850.500000,11.275000,1.726667,75.4392,2.5900,39.235837,Radial Velocity
3,14 Her b,14 Her,1766.378417,2.814750,,,1642.383591,5.167625,0.362925,,K0 V,5296.985000,0.976667,0.927143,17.9323,4.4200,43.816362,Radial Velocity
4,16 Cyg B b,16 Cyg B,799.375000,1.662833,,,533.514528,1.678667,0.676033,,G2.5 V,5728.594000,1.140000,1.016000,21.1397,4.3440,50.516824,Radial Velocity
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5625,ups And b,ups And,4.616229,0.058895,,,219.960419,0.692086,0.030200,,F8 V,6137.820000,1.615000,1.250000,13.4054,4.1625,41.403815,Radial Velocity
5626,ups And c,ups And,240.728533,0.827712,,,1275.328758,4.012667,0.238933,,F8 V,6117.093333,1.615000,1.262500,13.4054,4.1500,41.403815,Radial Velocity
5627,ups And d,ups And,1285.346167,2.528382,,,1616.242590,5.085333,0.281117,,F8 V,6117.093333,1.615000,1.262500,13.4054,4.1500,41.403815,Radial Velocity
5628,ups Leo b,ups Leo,385.200000,1.180000,,,162.092490,0.510000,0.320000,,G9 III,4836.000000,11.220000,1.480000,52.5973,2.4600,-0.823564,Radial Velocity


In [4]:
# display kepler-452 b
dataset[dataset['planet_name'] == 'Kepler-452 b']

Unnamed: 0,planet_name,host_star_name,orbital_period_day,orbit_semi_major_axis_au,planet_rad_earth_rad,planet_rad_jup_rad,planet_mass_earth_mass,planet_mass_jup_mass,Eccentricity,planet_equil_temp_k,spectral_type,stellar_effective_temp_k,stellar_rad_solar_rad,stellar_mass_solar_mass,distance_parsec,stellar_surf_grav,declination_deg,discovery_method
3999,Kepler-452 b,Kepler-452,384.844831,1.009733,1.3722,0.138,,,0.0,235.0,G2,5687.166667,0.998333,0.97,551.727,4.446,44.277586,Transit


In [5]:
# manually setting the mass for kepler-452 b
kepler452b_mass = 3.29  # in Earth mass
dataset.loc[dataset['planet_name'] == 'Kepler-452 b', 'planet_mass_earth_mass'] = kepler452b_mass

In [6]:
# display kepler-452 b to see if the mass was included
dataset[dataset['planet_name'] == 'Kepler-452 b']

Unnamed: 0,planet_name,host_star_name,orbital_period_day,orbit_semi_major_axis_au,planet_rad_earth_rad,planet_rad_jup_rad,planet_mass_earth_mass,planet_mass_jup_mass,Eccentricity,planet_equil_temp_k,spectral_type,stellar_effective_temp_k,stellar_rad_solar_rad,stellar_mass_solar_mass,distance_parsec,stellar_surf_grav,declination_deg,discovery_method
3999,Kepler-452 b,Kepler-452,384.844831,1.009733,1.3722,0.138,3.29,,0.0,235.0,G2,5687.166667,0.998333,0.97,551.727,4.446,44.277586,Transit


In [7]:
# removing exoplanets which are not orbiting a main sequence star, sub giant, or red giant
# WILL EDIT ONCE FINAL DATASET COMPLETE
dataset = dataset[dataset.iloc[:,0] != 'NN Ser c']
dataset = dataset[dataset.iloc[:,0] != 'NN Ser d']
dataset = dataset[dataset.iloc[:,0] != 'CFBDSIR J145829+101343 b']
dataset = dataset[dataset.iloc[:,0] != 'WISE J033605.05-014350.4 b']
dataset = dataset[dataset.iloc[:,0] != 'WISEP J121756.91+162640.2 A b']

### Filtering exoplanets to only include those which are orbiting a sun-like (Type G) star

In [8]:
# filling in the missing values for the spectral type column with unknown to allow for filtering computation
# WILL EDIT ONCE FINAL DATASET DONE
dataset['spectral_type'].fillna('Unknown', inplace=True)

dataset.head(20)

Unnamed: 0,planet_name,host_star_name,orbital_period_day,orbit_semi_major_axis_au,planet_rad_earth_rad,planet_rad_jup_rad,planet_mass_earth_mass,planet_mass_jup_mass,Eccentricity,planet_equil_temp_k,spectral_type,stellar_effective_temp_k,stellar_rad_solar_rad,stellar_mass_solar_mass,distance_parsec,stellar_surf_grav,declination_deg,discovery_method
0,11 Com b,11 Com,324.62,1.226,,,5505.066163,17.321333,0.2345,,G8 III,4808.0,16.38,2.463333,93.1846,2.38,17.793252,Radial Velocity
1,11 UMi b,11 UMi,516.219985,1.526667,,,3818.094733,12.013333,0.08,,K4 III,4276.5,26.935,2.093333,125.321,1.765,71.823943,Radial Velocity
2,14 And b,14 And,186.3,0.761667,,,1224.550433,3.853,0.0,,K0 III,4850.5,11.275,1.726667,75.4392,2.59,39.235837,Radial Velocity
3,14 Her b,14 Her,1766.378417,2.81475,,,1642.383591,5.167625,0.362925,,K0 V,5296.985,0.976667,0.927143,17.9323,4.42,43.816362,Radial Velocity
4,16 Cyg B b,16 Cyg B,799.375,1.662833,,,533.514528,1.678667,0.676033,,G2.5 V,5728.594,1.14,1.016,21.1397,4.344,50.516824,Radial Velocity
5,17 Sco b,17 Sco,578.38,1.45,,,1373.01872,4.32,0.06,,K3 III,4157.0,25.92,1.22,124.953,1.7,-11.837791,Radial Velocity
6,18 Del b,18 Del,988.075,2.538667,,,3109.01538,9.782333,0.052,,G6 III,4979.5,8.65,2.166667,76.222,2.845,10.839138,Radial Velocity
7,1RXS J160929.1-210524 b,1RXS J160929.1-210524,,330.0,18.647,1.664,3135.75,9.0,,1750.0,K7 V,4043.25,1.33,0.7925,139.135,4.0,-21.08314,Imaging
8,24 Boo b,24 Boo,30.3403,0.192,,,284.93389,0.8965,0.037,,G3 IV,4854.5,11.415,1.02,95.9863,2.35,49.844649,Radial Velocity
9,24 Sex b,24 Sex,452.8,1.333,,,632.46,1.99,0.09,,Unknown,5098.0,4.9,1.54,72.0691,3.5,-0.9024,Radial Velocity


In [9]:
# filtering exoplanets to only include those which are orbiting a sun-like (Type G) star
filtered_dataset = dataset[dataset['spectral_type'].str.startswith('G')]

filtered_dataset

Unnamed: 0,planet_name,host_star_name,orbital_period_day,orbit_semi_major_axis_au,planet_rad_earth_rad,planet_rad_jup_rad,planet_mass_earth_mass,planet_mass_jup_mass,Eccentricity,planet_equil_temp_k,spectral_type,stellar_effective_temp_k,stellar_rad_solar_rad,stellar_mass_solar_mass,distance_parsec,stellar_surf_grav,declination_deg,discovery_method
0,11 Com b,11 Com,324.620000,1.226000,,,5505.066163,17.321333,0.234500,,G8 III,4808.000000,16.380000,2.463333,93.1846,2.380000,17.793252,Radial Velocity
4,16 Cyg B b,16 Cyg B,799.375000,1.662833,,,533.514528,1.678667,0.676033,,G2.5 V,5728.594000,1.140000,1.016000,21.1397,4.344000,50.516824,Radial Velocity
6,18 Del b,18 Del,988.075000,2.538667,,,3109.015380,9.782333,0.052000,,G6 III,4979.500000,8.650000,2.166667,76.2220,2.845000,10.839138,Radial Velocity
8,24 Boo b,24 Boo,30.340300,0.192000,,,284.933890,0.896500,0.037000,,G3 IV,4854.500000,11.415000,1.020000,95.9863,2.350000,49.844649,Radial Velocity
27,47 UMa b,47 UMa,1083.040000,2.095800,,,812.223832,2.555600,0.041200,,G0 V,5861.053333,1.175000,1.043333,13.7967,4.315000,40.430493,Radial Velocity
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5612,pi Men c,HD 39091,6.267966,0.068137,2.051,0.183,4.116528,0.012953,0.019250,1158.5,G0 V,5943.750000,1.122500,1.060000,18.2702,4.380000,-80.464604,Transit
5614,psi1 Dra B b,psi1 Dra B,3117.000000,4.430000,,,486.279900,1.530000,0.400000,,G0 V,6212.000000,,1.190000,22.7188,4.200000,72.155711,Radial Velocity
5615,rho CrB b,rho CrB,39.794460,0.223850,,,341.261020,1.073725,0.034700,614.0,G0 V,5747.827500,1.342500,0.937500,17.4671,4.166667,33.300186,Radial Velocity
5616,rho CrB c,rho CrB,102.429000,0.416200,,,27.109270,0.085297,0.064000,448.0,G0 V,5738.103333,1.353333,0.916667,17.4671,4.166667,33.300186,Radial Velocity


In [10]:
# checking if kepler-452 b is present
kepler_452b_present = 'Kepler-452 b' in filtered_dataset['planet_name'].values
print(f"Is Kepler-452 b in the filtered dataset? {kepler_452b_present}")

Is Kepler-452 b in the filtered dataset? True


### Filtering based on mass and radius relation to see if Earth like and/or Super-Earth like

In [11]:
# Creating condition for Earth like and Super Earth like together
earth_and_super_earth_condition = (filtered_dataset['planet_rad_earth_rad'] <= 2) & (filtered_dataset['planet_mass_earth_mass'] <= 8)

# applying condtion to filtered dataset
filtered_dataset2 = filtered_dataset[earth_and_super_earth_condition]

# displaying filtered dataset
filtered_dataset2


Unnamed: 0,planet_name,host_star_name,orbital_period_day,orbit_semi_major_axis_au,planet_rad_earth_rad,planet_rad_jup_rad,planet_mass_earth_mass,planet_mass_jup_mass,Eccentricity,planet_equil_temp_k,spectral_type,stellar_effective_temp_k,stellar_rad_solar_rad,stellar_mass_solar_mass,distance_parsec,stellar_surf_grav,declination_deg,discovery_method
120,CoRoT-7 b,CoRoT-7,0.853629,0.017023,1.6525,0.147333,7.249359,0.022806,0.03,1833.0,G9 V,5266.75,0.862857,0.8925,159.906,4.51,-1.063008,Transit
624,HD 136352 b,HD 136352,11.579387,0.095567,1.573,0.14,4.716667,0.01483,0.073,908.0,G4 V,5726.0,1.035,0.863333,14.682,4.386667,-48.318817,Radial Velocity
631,HD 137496 b,HD 137496,1.62116,0.02732,1.31,0.117,4.04,0.01271,0.0,2130.0,G,5799.0,1.59,1.04,155.317,4.05,-16.509001,Transit
1174,HD 80653 b,HD 80653,0.719581,0.016605,1.613,0.144,5.66,0.01781,0.0,2463.0,G2,6019.5,1.22,1.18,109.86,4.34,14.367869,Transit
1218,HD 93963 A b,HD 93963 A,1.039135,0.02085,1.35,0.12,7.8,0.02454,,2042.0,G0 V,5987.0,1.04,1.11,82.3432,4.49,25.641065,Transit
1352,K2-111 b,K2-111,5.351605,0.058533,1.7105,0.1525,6.49,0.02042,0.076667,1309.0,G2,5761.8,1.181667,0.86,200.394,4.245,21.298526,Transit
1385,K2-138 b,K2-138,2.353173,0.033817,1.523333,0.136,3.1,0.00975,0.2255,1157.0,G8 V,5339.1,0.853333,0.93,202.585,4.556667,-10.849739,Transit
1567,K2-265 b,K2-265,2.368974,0.03376,1.6584,0.1484,6.54,0.02058,0.084,,G8 V,5424.4,0.905,0.93,138.671,4.4675,-14.494779,Transit
1999,Kepler-101 c,Kepler-101,6.029708,0.067486,1.076556,0.1075,3.78,0.01,0.0,1145.666667,G3 IV,5655.104,1.421,1.0875,927.981,4.204444,48.355247,Transit
3999,Kepler-452 b,Kepler-452,384.844831,1.009733,1.3722,0.138,3.29,,0.0,235.0,G2,5687.166667,0.998333,0.97,551.727,4.446,44.277586,Transit


In [12]:
# checking if kepler-452 b is present
kepler_452b_present = 'Kepler-452 b' in filtered_dataset2['planet_name'].values
print(f"Is Kepler-452 b in the filtered dataset? {kepler_452b_present}")

Is Kepler-452 b in the filtered dataset? True


### Filtering based on the Distance between Exoplanet and its Host Star

In [13]:
# creating condition (lower limit 0.75 AU and upper limit 1.5 AU)
orbit_semi_major_axis_condtion = (filtered_dataset2['orbit_semi_major_axis_au'] >= 0.75) & (filtered_dataset2['orbit_semi_major_axis_au'] <= 1.5)

# applying condtion to latest filtered dataset
filtered_dataset3 = filtered_dataset2[orbit_semi_major_axis_condtion]

# displaying latest filtered datset
filtered_dataset3

Unnamed: 0,planet_name,host_star_name,orbital_period_day,orbit_semi_major_axis_au,planet_rad_earth_rad,planet_rad_jup_rad,planet_mass_earth_mass,planet_mass_jup_mass,Eccentricity,planet_equil_temp_k,spectral_type,stellar_effective_temp_k,stellar_rad_solar_rad,stellar_mass_solar_mass,distance_parsec,stellar_surf_grav,declination_deg,discovery_method
3999,Kepler-452 b,Kepler-452,384.844831,1.009733,1.3722,0.138,3.29,,0.0,235.0,G2,5687.166667,0.998333,0.97,551.727,4.446,44.277586,Transit


### Filtering Vased on the Orbital Period

In [14]:
# creating condition (for a G like star this is the habital zone where the lower limit of 200 Earth days is venus and upper limit of 700 Earth days is mars)
orbital_period_condtion = (filtered_dataset3['orbital_period_day'] >= 200) & (filtered_dataset3['orbital_period_day'] <= 700)

# applying condtion to latest filtered dataset
filtered_dataset4 = filtered_dataset3[orbital_period_condtion]

# displaying latest filtered datset
filtered_dataset4


Unnamed: 0,planet_name,host_star_name,orbital_period_day,orbit_semi_major_axis_au,planet_rad_earth_rad,planet_rad_jup_rad,planet_mass_earth_mass,planet_mass_jup_mass,Eccentricity,planet_equil_temp_k,spectral_type,stellar_effective_temp_k,stellar_rad_solar_rad,stellar_mass_solar_mass,distance_parsec,stellar_surf_grav,declination_deg,discovery_method
3999,Kepler-452 b,Kepler-452,384.844831,1.009733,1.3722,0.138,3.29,,0.0,235.0,G2,5687.166667,0.998333,0.97,551.727,4.446,44.277586,Transit
