# Host Star Classifications and Graphs

In [8]:
# Importing
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [9]:
# loading dataset
dataset = pd.read_csv('filtered_exoplanet_dataset_removed_outliers.csv')

In [10]:
dataset.columns

Index(['planet_name', 'host_star_name', 'orbital_period_day',
       'orbit_semi_major_axis_au', 'planet_rad_earth_rad',
       'planet_rad_jup_rad', 'planet_mass_earth_mass', 'planet_mass_jup_mass',
       'Eccentricity', 'planet_equil_temp_k', 'spectral_type',
       'stellar_effective_temp_k', 'stellar_rad_solar_rad',
       'stellar_mass_solar_mass', 'distance_parsec', 'stellar_surf_grav',
       'declination_deg', 'discovery_method'],
      dtype='object')

In [11]:
# deleting uneeded columns
del dataset['planet_rad_earth_rad']
del dataset['planet_rad_jup_rad']
del dataset['planet_mass_earth_mass']
del dataset['planet_mass_jup_mass']
del dataset['Eccentricity']
del dataset['planet_equil_temp_k']
del dataset['declination_deg']
del dataset['discovery_method']

In [12]:
dataset.head(10)

Unnamed: 0,planet_name,host_star_name,orbital_period_day,orbit_semi_major_axis_au,spectral_type,stellar_effective_temp_k,stellar_rad_solar_rad,stellar_mass_solar_mass,distance_parsec,stellar_surf_grav
0,11 Com b,11 Com,324.62,1.226,G8 III,4808.0,16.38,2.463333,93.1846,2.38
1,11 UMi b,11 UMi,516.219985,1.526667,K4 III,4276.5,26.935,2.093333,125.321,1.765
2,14 And b,14 And,186.3,0.761667,K0 III,4850.5,11.275,1.726667,75.4392,2.59
3,14 Her b,14 Her,1766.378417,2.81475,K0 V,5296.985,0.976667,0.927143,17.9323,4.42
4,16 Cyg B b,16 Cyg B,799.375,1.662833,G2.5 V,5728.594,1.14,1.016,21.1397,4.344
5,17 Sco b,17 Sco,578.38,1.45,K3 III,4157.0,25.92,1.22,124.953,1.7
6,18 Del b,18 Del,988.075,2.538667,G6 III,4979.5,8.65,2.166667,76.222,2.845
7,1RXS J160929.1-210524 b,1RXS J160929.1-210524,,330.0,K7 V,4043.25,1.33,0.7925,139.135,4.0
8,24 Boo b,24 Boo,30.3403,0.192,G3 IV,4854.5,11.415,1.02,95.9863,2.35
9,24 Sex b,24 Sex,452.8,1.333,,5098.0,4.9,1.54,72.0691,3.5


In [14]:
dataset.shape

(5630, 10)

In [19]:
# filtering dataset to only contain main sequence star type
# na=False handles the missing values
# dataset['spectral_type'].str.endswith('V', na=False) means: filter the dataset to include any star type that ends with 'V' and creates a boolean Series where True indicates that the Star Type ends with 'V' and False otherwise.
# ~dataset['spectral_type'].str.endswith('IV', na=False) means: filter out any star type that ends with 'IV' and inverts the boolean Series so that True indicates that the Star Type does not end with 'IV' and False indicates that it does.
filtered_dataset = dataset[dataset['spectral_type'].str.endswith('V', na=False) & ~dataset['spectral_type'].str.endswith('IV', na=False)]

In [20]:
filtered_dataset.head(10)

Unnamed: 0,planet_name,host_star_name,orbital_period_day,orbit_semi_major_axis_au,spectral_type,stellar_effective_temp_k,stellar_rad_solar_rad,stellar_mass_solar_mass,distance_parsec,stellar_surf_grav
3,14 Her b,14 Her,1766.378417,2.81475,K0 V,5296.985,0.976667,0.927143,17.9323,4.42
4,16 Cyg B b,16 Cyg B,799.375,1.662833,G2.5 V,5728.594,1.14,1.016,21.1397,4.344
7,1RXS J160929.1-210524 b,1RXS J160929.1-210524,,330.0,K7 V,4043.25,1.33,0.7925,139.135,4.0
11,2M0437 b,2MASS J04372171+2651014,,118.0,M V,3100.0,0.84,0.17,128.484,4.5
13,2MASS J01225093-2439505 b,2MASS J01225093-2439505,,52.0,M3.5 V,3419.5,0.37,0.375,33.8281,4.86
15,2MASS J0249-0557 c,2MASS J0249-0557 A,,1950.0,M6 V,,,0.05,66.0613,
23,2MASS J22362452+4751425 b,2MASS J22362452+4751425,,230.0,K7 V,4045.0,,0.6,69.5737,
24,30 Ari B b,30 Ari B,338.533337,0.998333,F6 V,6315.5,1.223333,1.416667,44.657,4.465
27,47 UMa b,47 UMa,1083.04,2.0958,G0 V,5861.053333,1.175,1.043333,13.7967,4.315
28,47 UMa c,47 UMa,4088.0,4.911333,G1 V,5850.58,1.175,1.035,13.7967,4.315


In [21]:
filtered_dataset.shape

(1009, 10)

In [22]:
# saving the output file for a closer look
#filtered_dataset.to_csv('host_star_classification.csv', index=False)