## Planet classification
We classify the planets into different categories through threshold defined by NASA

In [8]:
# imports and stuff
import warnings

warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

In [9]:
# Import pre-processed dataset from preproc1 (fully aggregated planets)
nasa = pd.read_csv(
    '../data/nasa_aggregated.csv',
)

display(nasa)
display(nasa.columns.values)

Unnamed: 0,pl_name,hostname,pl_orbper,pl_orbsmax,pl_orbeccen,pl_rade,pl_bmasse,pl_masse,pl_dens,pl_insol,...,pl_orbincl,st_teff,st_rad,st_mass,st_lum,st_met,st_logg,sy_dist,discoverymethod,disc_year
0,11 Com b,11 Com,323.210000,1.17800,0.2380,,4914.89849,,,,...,,4874.00,13.76,2.09,1.978,-0.260,2.45,93.1846,Radial Velocity,2007
1,11 UMi b,11 UMi,516.219970,1.53000,0.0800,,4684.81420,,,,...,,4213.00,29.79,2.78,,-0.020,1.93,125.3210,Radial Velocity,2009
2,14 And b,14 And,186.760000,0.77500,0.0000,,1131.15130,,,,...,,4888.00,11.55,1.78,1.840,-0.210,2.55,75.4392,Radial Velocity,2008
3,14 Her b,14 Her,1765.038900,2.77400,0.3730,,2559.47216,2559.47216,,,...,144.652,5314.94,1.00,0.91,-0.153,0.405,4.43,17.9323,Radial Velocity,2002
4,16 Cyg B b,16 Cyg B,799.450000,1.67600,0.6832,,556.83537,,,,...,,5711.97,1.16,0.98,0.097,0.074,4.30,21.1397,Radial Velocity,1996
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5798,ups And b,ups And,4.617122,0.05914,0.0069,,214.53417,,,,...,,6156.77,1.62,1.29,0.525,0.122,4.13,13.4054,Radial Velocity,1996
5799,ups And c,ups And,241.223000,0.82650,0.2660,,624.53282,4443.24113,,,...,,6156.77,1.62,1.29,0.525,0.122,4.13,13.4054,Radial Velocity,1999
5800,ups And d,ups And,1282.410000,2.51700,0.2940,,1303.09647,3257.74117,,,...,,6156.77,1.62,1.29,0.525,0.122,4.13,13.4054,Radial Velocity,1999
5801,ups Leo b,ups Leo,385.200000,1.18000,0.3200,,162.09249,,,,...,,4836.00,11.22,1.48,1.800,-0.200,2.46,52.5973,Radial Velocity,2021


array(['pl_name', 'hostname', 'pl_orbper', 'pl_orbsmax', 'pl_orbeccen',
       'pl_rade', 'pl_bmasse', 'pl_masse', 'pl_dens', 'pl_insol',
       'pl_eqt', 'pl_orbincl', 'st_teff', 'st_rad', 'st_mass', 'st_lum',
       'st_met', 'st_logg', 'sy_dist', 'discoverymethod', 'disc_year'],
      dtype=object)

In [10]:
def classify_planet(radius, mass, temp):
    missing_values = sum(x is None for x in [radius, mass, temp])

    # Completely unknown if more than one value is missing
    if missing_values > 1:
        return "Unknown"

    # Partially known if exactly one value is missing
    if missing_values == 1:
        return "Partially Known"

    # Fully known classification
    if radius < 0.5 and mass < 0.5 and 150 <= temp <= 400:
        return "Sub-Earth"
    elif 0.5 <= radius <= 1.5 and 0.5 <= mass <= 5 and 180 <= temp <= 370:
        return "Earth-like"
    elif 1.5 < radius <= 2.5 and 5 < mass <= 10 and 100 <= temp < 180:
        return "Cold Super-Earth"
    elif 1.5 < radius <= 2.5 and 5 < mass <= 10 and 370 < temp <= 800:
        return "Hot Super-Earth"
    elif 2.5 < radius <= 4 and 10 < mass <= 50 and 300 < temp <= 1000:
        return "Mini-Neptune"
    elif 4 < radius <= 6 and 50 < mass <= 100 and 500 < temp <= 1500:
        return "Neptune-like"
    elif radius > 6 and mass > 100 and temp > 800:
        return "Gas Giant"
    else:
        return "Unknown"


In [16]:
nasa['pl_type'] = nasa.apply(lambda row: classify_planet(row['pl_rade'], row['pl_bmasse'], row['pl_eqt']), axis=1)
display(nasa)

print(nasa['pl_type'].value_counts())

Unnamed: 0,pl_name,hostname,pl_orbper,pl_orbsmax,pl_orbeccen,pl_rade,pl_bmasse,pl_masse,pl_dens,pl_insol,...,st_teff,st_rad,st_mass,st_lum,st_met,st_logg,sy_dist,discoverymethod,disc_year,pl_type
0,11 Com b,11 Com,323.210000,1.17800,0.2380,,4914.89849,,,,...,4874.00,13.76,2.09,1.978,-0.260,2.45,93.1846,Radial Velocity,2007,Unknown
1,11 UMi b,11 UMi,516.219970,1.53000,0.0800,,4684.81420,,,,...,4213.00,29.79,2.78,,-0.020,1.93,125.3210,Radial Velocity,2009,Unknown
2,14 And b,14 And,186.760000,0.77500,0.0000,,1131.15130,,,,...,4888.00,11.55,1.78,1.840,-0.210,2.55,75.4392,Radial Velocity,2008,Unknown
3,14 Her b,14 Her,1765.038900,2.77400,0.3730,,2559.47216,2559.47216,,,...,5314.94,1.00,0.91,-0.153,0.405,4.43,17.9323,Radial Velocity,2002,Unknown
4,16 Cyg B b,16 Cyg B,799.450000,1.67600,0.6832,,556.83537,,,,...,5711.97,1.16,0.98,0.097,0.074,4.30,21.1397,Radial Velocity,1996,Unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5798,ups And b,ups And,4.617122,0.05914,0.0069,,214.53417,,,,...,6156.77,1.62,1.29,0.525,0.122,4.13,13.4054,Radial Velocity,1996,Unknown
5799,ups And c,ups And,241.223000,0.82650,0.2660,,624.53282,4443.24113,,,...,6156.77,1.62,1.29,0.525,0.122,4.13,13.4054,Radial Velocity,1999,Unknown
5800,ups And d,ups And,1282.410000,2.51700,0.2940,,1303.09647,3257.74117,,,...,6156.77,1.62,1.29,0.525,0.122,4.13,13.4054,Radial Velocity,1999,Unknown
5801,ups Leo b,ups Leo,385.200000,1.18000,0.3200,,162.09249,,,,...,4836.00,11.22,1.48,1.800,-0.200,2.46,52.5973,Radial Velocity,2021,Unknown


pl_type
Unknown            5110
Gas Giant           535
Mini-Neptune        105
Hot Super-Earth      42
Earth-like            6
Neptune-like          5
Name: count, dtype: int64
