In [1]:
# Import all necessary packages including astropy for constants in equations

# import packages
import numpy as np
import pandas as pd
import requests
import math
from astropy.constants import sigma_sb, L_sun
from bs4 import BeautifulSoup as BS

# Import visual packages
import matplotlib.pyplot as plt
import seaborn as sn
from matplotlib import cm

pd.set_option("display.max_columns", 101)

In [2]:
# Read CSV file that has gamma magnitudes 
exoplanet = pd.read_csv('exoplanet.csv')

In [3]:
exoplanet.columns

Index(['pl_name', 'hostname', 'sy_snum', 'sy_pnum', 'discoverymethod',
       'disc_year', 'disc_facility', 'pl_controv_flag', 'pl_orbper',
       'pl_orbpererr1', 'pl_orbpererr2', 'pl_orbperlim', 'pl_orbsmax',
       'pl_orbsmaxerr1', 'pl_orbsmaxerr2', 'pl_orbsmaxlim', 'pl_rade',
       'pl_radeerr1', 'pl_radeerr2', 'pl_radelim', 'pl_radj', 'pl_radjerr1',
       'pl_radjerr2', 'pl_radjlim', 'pl_bmasse', 'pl_bmasseerr1',
       'pl_bmasseerr2', 'pl_bmasselim', 'pl_bmassj', 'pl_bmassjerr1',
       'pl_bmassjerr2', 'pl_bmassjlim', 'pl_bmassprov', 'pl_orbeccen',
       'pl_orbeccenerr1', 'pl_orbeccenerr2', 'pl_orbeccenlim', 'pl_insol',
       'pl_insolerr1', 'pl_insolerr2', 'pl_insollim', 'pl_eqt', 'pl_eqterr1',
       'pl_eqterr2', 'pl_eqtlim', 'ttv_flag', 'st_spectype', 'st_teff',
       'st_tefferr1', 'st_tefferr2', 'st_tefflim', 'st_rad', 'st_raderr1',
       'st_raderr2', 'st_radlim', 'st_mass', 'st_masserr1', 'st_masserr2',
       'st_masslim', 'st_met', 'st_meterr1', 'st_meterr2', 

In [4]:
# Dropped all columns that where related to one feature, it was the error associated witb that feature  
exoplanet.drop(columns = ['pl_orbsmaxerr1', 'pl_orbsmaxerr2', 'pl_orbsmaxlim','pl_radeerr1', 'pl_radeerr2', 'pl_radelim', 'pl_radjerr1',
       'pl_radjerr2', 'pl_radjlim', 'pl_bmasseerr1',
       'pl_bmasseerr2', 'pl_bmasselim', 'pl_bmassjerr1', 'pl_orbpererr1', 'pl_orbpererr2', 'pl_orbperlim',
       'pl_bmassjerr2', 'pl_bmassjlim','pl_orbeccenerr1', 'pl_orbeccenerr2', 'pl_orbeccenlim','pl_insolerr1', 'pl_insolerr2', 'pl_insollim','pl_eqterr1',
       'pl_eqterr2', 'pl_eqtlim','st_tefferr1', 'st_tefferr2', 'st_tefflim','st_raderr1',
       'st_raderr2','st_masserr1', 'st_masserr2',
       'st_masslim','st_meterr1', 'st_meterr2', 'st_metlim','st_loggerr1', 'st_loggerr2', 'st_logglim','rastr','decstr','sy_disterr1', 'sy_disterr2','sy_vmagerr1', 'sy_vmagerr2', 'sy_kmagerr1',
       'sy_kmagerr2','sy_gaiamagerr1', 'sy_gaiamagerr2'], inplace = True)

In [6]:
#Webscaped table from kyoto database for exoplanets
url = 'http://www.exoplanetkyoto.org/exohtml/A_All_Exoplanets.html'
resp = requests.get(url)
bs = BS(resp.content, 'html.parser')
x = bs.table.findAll('td')
lst = []
for i in range(0,len(x),10):
    lst.append([f.text.strip() for f in x[i:i+10]])
    df = pd.DataFrame(lst,columns=lst[0])

In [7]:
# Dropped column name No from df dataframe that carries all the exoplanet info from kyoto website
df.drop(columns = ['No'], axis = 1,  inplace = True)

In [8]:
# Dropped the first row in the dataframe that carried all the column names 
df.drop(df.index[0], inplace = True)

In [10]:
# Feature engineering by converting the exoplanets orbits in days to years 
exoplanet['pl_orbper_yrs'] = exoplanet['pl_orbper'] / 365.25

In [11]:
# Fill the Nan values of the planets who have a semimajor axis but not orbit
exoplanet.loc[exoplanet.pl_orbsmax.isna(), 'pl_orbsmax'] = \
exoplanet.loc[exoplanet.pl_orbsmax.isna(), 'pl_orbper_yrs'].apply(lambda x: (x**2)**(1./ 3.))

In [12]:
# Filled in Nan values of planets who have a orbit but no semimajor axis
exoplanet.loc[exoplanet.pl_orbper.isna(), 'pl_orbper'] = \
exoplanet.loc[exoplanet.pl_orbper.isna(), 'pl_orbsmax'].apply(lambda x: (x**3)**(1./ 2.))

In [17]:
# Renamed the df dataframe to reflect the exoplanet dataframes column names 
df.rename(columns = {"Exoplanet's Name": 'pl_name', "Radius (Rjup)": 'pl_radj', "Mass   (Mjup)": 'pl_bmassj', "Semi Major Axis (AU)":'pl_orbsmax', "Orbital Period (yr)": 'pl_orbper'}, inplace = True)

In [19]:
# Inputting the specific semimajor axis for a specific exoplane from the df dataframe into exoplanet dataframe
exoplanet.loc[exoplanet.pl_name == "KMT-2019-BLG-1953L b", ['pl_orbsmax']] = df.loc[df.pl_name == "KMT-2019-BLG-1953L b", ['pl_orbsmax']].values

In [20]:

exoplanet.loc[exoplanet.pl_orbper.isna()]

Unnamed: 0,pl_name,hostname,sy_snum,sy_pnum,discoverymethod,disc_year,disc_facility,pl_controv_flag,pl_orbper,pl_orbsmax,pl_rade,pl_radj,pl_bmasse,pl_bmassj,pl_bmassprov,pl_orbeccen,pl_insol,pl_eqt,ttv_flag,st_spectype,st_teff,st_rad,st_radlim,st_mass,st_met,st_metratio,st_logg,ra,dec,sy_dist,sy_vmag,sy_kmag,sy_gaiamag,pl_orbper_yrs
1457,KMT-2019-BLG-1953L b,KMT-2019-BLG-1953L,1,1,Microlensing,2020,KMTNet,0,,0.8,14.1,1.26,200.0,0.64,Mass,,,,0,,,,,0.31,,,,269.11625,-28.201111,7040.0,,,,
4026,SR 12 AB c,SR 12 AB,2,1,Imaging,2010,Infrared Survey Facility,0,,,12.3,1.1,4131.62,13.0,Mass,,,,0,,3828.93,1.1,0.0,,,,,246.831236,-24.694669,112.317216,13.355,8.408,12.1811,


In [21]:
# fill the na for this specific exoplanet from its semimajor axis 
exoplanet.loc[exoplanet.pl_orbper.isna(), 'pl_orbper'] = \
exoplanet.loc[exoplanet.pl_orbper.isna(), 'pl_orbsmax'].apply(lambda x: (x**3)**(1./ 2.) if type(x) == float else x)

In [22]:
miss_radj = exoplanet.loc[exoplanet.pl_radj.isna(), 'pl_name'].values

In [23]:
rep_radj = dict(zip(miss_radj, df.loc[df.pl_name.isin(miss_radj), 'pl_radj'].values))
rep_radj

{'HD 158259 b': '0.1070',
 'HD 202696 b': '0.9078',
 'HD 202696 c': '0.9078',
 'KIC 10001893 b': '0.0000',
 'KIC 10001893 c': '0.0000',
 'KIC 10001893 d': '0.0000',
 'Kepler-160 d': '0.9078',
 'Kepler-37 e': '0.9078',
 'Kepler-93 c': '0.9078',
 'Kepler-97 c': '0.9078'}

In [24]:
exoplanet.loc[exoplanet.pl_name.isin(rep_radj.keys()), 'pl_radj'] = exoplanet[exoplanet.pl_name.isin(rep_radj.keys())].pl_name.apply(lambda x: rep_radj[x])

In [25]:
exoplanet[(exoplanet.pl_name.isin(miss_radj)) & (exoplanet.pl_radj.isna())]

Unnamed: 0,pl_name,hostname,sy_snum,sy_pnum,discoverymethod,disc_year,disc_facility,pl_controv_flag,pl_orbper,pl_orbsmax,pl_rade,pl_radj,pl_bmasse,pl_bmassj,pl_bmassprov,pl_orbeccen,pl_insol,pl_eqt,ttv_flag,st_spectype,st_teff,st_rad,st_radlim,st_mass,st_met,st_metratio,st_logg,ra,dec,sy_dist,sy_vmag,sy_kmag,sy_gaiamag,pl_orbper_yrs
3871,LkCa 15 b,LkCa 15,1,2,Imaging,2015,Large Binocular Telescope Observatory,1,56.3607,14.7,,,,,Msini,,,,0,,4194.0,1.61,0.0,,0.26,[Fe/H],3.65,69.824179,22.350866,158.152,12.025,8.163,11.5887,
3872,LkCa 15 c,LkCa 15,1,2,Imaging,2015,Large Binocular Telescope Observatory,1,80.2176,18.6,,,,,Msini,,,,0,,4194.0,1.61,0.0,,0.26,[Fe/H],3.65,69.824179,22.350866,158.152,12.025,8.163,11.5887,
4005,PSR J1719-1438 b,PSR J1719-1438,1,1,Pulsar Timing,2011,Parkes Observatory,0,0.0907063,0.0044,,,382.8,1.2,Mass,0.06,,,0,,4500.0,0.04,0.0,1.4,,,,259.791971,-14.6336,1200.0,,,,0.000248
4124,UZ For b,UZ For,2,2,Eclipse Timing Variations,2011,Multiple Observatories,0,5840.0,5.9,,,2002.2,6.3,Mass,0.04,,,0,,5286.9,,,0.7,,,,53.869468,-25.739386,239.603875,16.7753,14.776,16.6937,15.989049
4125,UZ For c,UZ For,2,2,Eclipse Timing Variations,2011,Multiple Observatories,0,1916.25,2.8,,,2447.2,7.7,Mass,0.05,,,0,,5286.9,,,0.7,,,,53.869468,-25.739386,239.603875,16.7753,14.776,16.6937,5.246407
4256,WASP-53 c,WASP-53,1,2,Radial Velocity,2017,Multiple Facilities,0,2840.0,3.73,,,5196.5205,16.35,Msini,0.8369,,,0,,4953.0,0.8,0.0,0.84,0.22,[Fe/H],4.55,31.909164,-20.661843,201.338,12.591,10.39,12.3473,7.775496


In [56]:
miss_massj = exoplanet.loc[exoplanet.pl_bmassj.isna(), 'pl_name'].values

In [57]:
rep_massj = dict(zip(miss_massj, df.loc[df.pl_name.isin(miss_massj), 'pl_bmassj'].values))
rep_massj

{'K2-253 b': '1.3406',
 'KOI-217 b': '1.3442',
 'Kepler-302 c': '1.6518',
 'Kepler-468 b': '1.0372',
 'Kepler-487 b': '1.0994',
 'Kepler-490 b': '1.2008',
 'Kepler-548 b': '0.9773',
 'Kepler-553 c': '1.5942',
 'Kepler-670 b': '1.2486',
 'Kepler-686 b': '1.9562',
 'Kepler-699 b': '1.5902',
 'Kepler-706 b': '1.2625',
 'Kepler-723 b': '1.3047',
 'Kepler-730 b': '1.8599',
 'Kepler-731 b': '1.4181',
 'Kepler-762 b': '1.5380',
 'Kepler-785 b': '1.4144'}

In [58]:
exoplanet.loc[exoplanet.pl_name.isin(rep_massj.keys()), 'pl_bmassj'] = exoplanet[exoplanet.pl_name.isin(rep_massj.keys())].pl_name.apply(lambda x: rep_massj[x])

In [61]:
exoplanet

Unnamed: 0,pl_name,hostname,sy_snum,sy_pnum,discoverymethod,disc_year,disc_facility,pl_controv_flag,pl_orbper,pl_orbsmax,pl_rade,pl_radj,pl_bmasse,pl_bmassj,pl_bmassprov,pl_orbeccen,pl_insol,pl_eqt,ttv_flag,st_spectype,st_teff,st_rad,st_radlim,st_mass,st_met,st_metratio,st_logg,ra,dec,sy_dist,sy_vmag,sy_kmag,sy_gaiamag,pl_orbper_yrs
0,11 Com b,11 Com,2,1,Radial Velocity,2007,Xinglong Station,0,326.03,1.29,12.1,1.08,6165.6000,19.4,Msini,0.2310,,,0,G8 III,4742.00,19.00,0.0,2.70,-0.350,[Fe/H],2.31,185.178779,17.793252,93.1846,4.72307,2.282,4.44038,0.892621
1,11 UMi b,11 UMi,1,1,Radial Velocity,2009,Thueringer Landessternwarte Tautenburg,0,516.22,1.53,12.3,1.09,4684.8142,14.74,Msini,0.0800,,,0,K4 III,4213.00,29.79,0.0,2.78,-0.020,[Fe/H],1.93,229.274595,71.823943,125.3210,5.01300,1.939,4.56216,1.413333
2,14 And b,14 And,1,1,Radial Velocity,2008,Okayama Astrophysical Observatory,0,185.84,0.83,12.9,1.15,1525.5000,4.8,Msini,0.0000,,,0,K0 III,4813.00,11.00,0.0,2.20,-0.240,[Fe/H],2.63,352.824150,39.235837,75.4392,5.23133,2.331,4.91781,0.508802
3,14 Her b,14 Her,1,1,Radial Velocity,2002,W. M. Keck Observatory,0,1773.4,2.93,12.9,1.15,1481.0878,4.66,Msini,0.3700,,,0,K0 V,5338.00,0.93,0.0,0.90,0.410,[Fe/H],4.45,242.602101,43.816362,17.9323,6.61935,4.714,6.38300,4.855305
4,16 Cyg B b,16 Cyg B,3,1,Radial Velocity,1996,Multiple Observatories,0,798.5,1.66,13.5,1.2,565.7374,1.78,Msini,0.6800,,,0,G3 V,5750.00,1.13,0.0,1.08,0.060,[Fe/H],4.36,295.465642,50.516824,21.1397,6.21500,4.651,6.06428,2.186174
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4362,tau Gem b,tau Gem,1,1,Radial Velocity,2013,Lick Observatory,0,305.5,1.17,12.1,1.08,6547.0000,20.6,Msini,0.0310,,,0,K2 III,4388.00,26.80,0.0,2.30,0.140,[Fe/H],1.96,107.784723,30.244954,112.5370,4.37799,1.681,4.03996,0.836413
4363,ups And b,ups And,2,3,Radial Velocity,1996,Lick Observatory,0,4.61703,0.059222,14,1.25,218.5310,0.6876,Msini,0.0215,,,0,F8 V,6105.51,1.56,0.0,1.30,0.101,[M/H],4.07,24.198353,41.403815,13.4054,4.09565,2.859,3.98687,0.012641
4364,ups And c,ups And,2,3,Radial Velocity,1999,Multiple Observatories,0,241.258,0.827774,13.4,1.19,629.5950,1.981,Msini,0.2596,,,0,F8 V,6105.51,1.56,0.0,1.30,0.101,[M/H],4.07,24.198353,41.403815,13.4054,4.09565,2.859,3.98687,0.660528
4365,ups And d,ups And,2,3,Radial Velocity,1999,Multiple Observatories,0,1276.46,2.51329,13,1.16,1313.2200,4.132,Msini,0.2987,,,0,F8 V,6105.51,1.56,0.0,1.30,0.101,[M/H],4.07,24.198353,41.403815,13.4054,4.09565,2.859,3.98687,3.494757


In [31]:
exoplanet.loc[exoplanet.pl_rade.isna(), 'pl_rade'] = \
exoplanet.loc[exoplanet.pl_rade.isna(), 'pl_radj'].apply(lambda x: x * 11)

In [33]:
exoplanet.loc[exoplanet.pl_orbsmax.isna(), 'pl_orbsmax'] = 1083.0

In [36]:
exoplanet.loc[exoplanet.pl_orbper.isna(), 'pl_orbper'] = \
exoplanet.loc[exoplanet.pl_orbper.isna(), 'pl_orbsmax'].apply(lambda x: (x**3)**(1./2.))

In [68]:
exoplanet.loc[exoplanet.pl_bmasse.isna(), 'pl_bmasse'] = \
exoplanet.loc[exoplanet.pl_bmasse.isna(), 'pl_bmassj'].apply(lambda x : x * (317.83) if type(x) == float else x)

In [73]:
exoplanet.loc[exoplanet.pl_rade.isna()]

Unnamed: 0,pl_name,hostname,sy_snum,sy_pnum,discoverymethod,disc_year,disc_facility,pl_controv_flag,pl_orbper,pl_orbsmax,pl_rade,pl_radj,pl_bmasse,pl_bmassj,pl_bmassprov,pl_orbeccen,pl_insol,pl_eqt,ttv_flag,st_spectype,st_teff,st_rad,st_radlim,st_mass,st_met,st_metratio,st_logg,ra,dec,sy_dist,sy_vmag,sy_kmag,sy_gaiamag,pl_orbper_yrs
3871,LkCa 15 b,LkCa 15,1,2,Imaging,2015,Large Binocular Telescope Observatory,1,56.3607,14.7,,,1.2486,1.2486,Msini,,,,0,,4194.0,1.61,0.0,,0.26,[Fe/H],3.65,69.824179,22.350866,158.152,12.025,8.163,11.5887,
3872,LkCa 15 c,LkCa 15,1,2,Imaging,2015,Large Binocular Telescope Observatory,1,80.2176,18.6,,,1.9562,1.9562,Msini,,,,0,,4194.0,1.61,0.0,,0.26,[Fe/H],3.65,69.824179,22.350866,158.152,12.025,8.163,11.5887,
4005,PSR J1719-1438 b,PSR J1719-1438,1,1,Pulsar Timing,2011,Parkes Observatory,0,0.0907063,0.0044,,,382.8,1.5902,Mass,0.06,,,0,,4500.0,0.04,0.0,1.4,,,,259.791971,-14.6336,1200.0,,,,0.000248
4124,UZ For b,UZ For,2,2,Eclipse Timing Variations,2011,Multiple Observatories,0,5840.0,5.9,,,2002.2,1.2625,Mass,0.04,,,0,,5286.9,,,0.7,,,,53.869468,-25.739386,239.603875,16.7753,14.776,16.6937,15.989049
4125,UZ For c,UZ For,2,2,Eclipse Timing Variations,2011,Multiple Observatories,0,1916.25,2.8,,,2447.2,1.3047,Mass,0.05,,,0,,5286.9,,,0.7,,,,53.869468,-25.739386,239.603875,16.7753,14.776,16.6937,5.246407
4256,WASP-53 c,WASP-53,1,2,Radial Velocity,2017,Multiple Facilities,0,2840.0,3.73,,,5196.52,1.8599,Msini,0.8369,,,0,,4953.0,0.8,0.0,0.84,0.22,[Fe/H],4.55,31.909164,-20.661843,201.338,12.591,10.39,12.3473,7.775496


In [78]:
exoplanet.loc[(exoplanet.pl_name == 'WASP-53 c') & (exoplanet.pl_radj.isna()), 'pl_radj'] = 0.9078

In [81]:
exoplanet.loc[exoplanet.pl_rade.isna(), 'pl_rade'] = 0.000

In [84]:
exoplanet.loc[exoplanet.pl_radj.isna(), 'pl_radj'] = 0.000

In [87]:
exoplanet.loc[exoplanet.pl_name == 'WASP-53 c', 'pl_rade'] = \
exoplanet.loc[exoplanet.pl_name == 'WASP-53 c', 'pl_radj'].apply(lambda x: x * 11.2 if type(x) == float else x)	

In [88]:
exoplanet.loc[exoplanet.pl_name == 'WASP-53 c']

Unnamed: 0,pl_name,hostname,sy_snum,sy_pnum,discoverymethod,disc_year,disc_facility,pl_controv_flag,pl_orbper,pl_orbsmax,pl_rade,pl_radj,pl_bmasse,pl_bmassj,pl_bmassprov,pl_orbeccen,pl_insol,pl_eqt,ttv_flag,st_spectype,st_teff,st_rad,st_radlim,st_mass,st_met,st_metratio,st_logg,ra,dec,sy_dist,sy_vmag,sy_kmag,sy_gaiamag,pl_orbper_yrs
4256,WASP-53 c,WASP-53,1,2,Radial Velocity,2017,Multiple Facilities,0,2840,3.73,10.1674,0.9078,5196.52,1.8599,Msini,0.8369,,,0,,4953.0,0.8,0.0,0.84,0.22,[Fe/H],4.55,31.909164,-20.661843,201.338,12.591,10.39,12.3473,7.775496


In [89]:
exoplanet.isna().sum()

pl_name               0
hostname              0
sy_snum               0
sy_pnum               0
discoverymethod       0
disc_year             0
disc_facility         0
pl_controv_flag       0
pl_orbper             0
pl_orbsmax            0
pl_rade               0
pl_radj               0
pl_bmasse             0
pl_bmassj             0
pl_bmassprov          0
pl_orbeccen         478
pl_insol           1581
pl_eqt             1106
ttv_flag              0
st_spectype        2810
st_teff             114
st_rad              132
st_radlim           132
st_mass               4
st_met              276
st_metratio         304
st_logg             139
ra                    0
dec                   0
sy_dist               5
sy_vmag             112
sy_kmag             110
sy_gaiamag          153
pl_orbper_yrs       140
dtype: int64