## Planet classification
We classify the planets into different categories through threshold defined by NASA

In [9]:
# imports and stuff
import warnings

warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

In [10]:
# Import pre-processed dataset from preproc1 (fully aggregated planets)
nasa = pd.read_csv(
    '../data/nasa_aggregated.csv',
)

display(nasa)
display(nasa.columns.values)

Unnamed: 0.1,Unnamed: 0,pl_name,hostname,pl_orbper,pl_orbsmax,pl_orbeccen,pl_rade,pl_bmasse,pl_masse,pl_dens,...,pl_orbincl,st_teff,st_rad,st_mass,st_lum,st_met,st_logg,sy_dist,discoverymethod,disc_year
0,0,14 Her b,14 Her,1765.038900,2.77400,0.3730,,2559.47216,2559.47216,,...,144.652,5314.94,1.00,0.91,-0.153,0.405,4.43,17.9323,Radial Velocity,2002
1,1,16 Cyg B b,16 Cyg B,799.450000,1.67600,0.6832,,556.83537,,,...,,5711.97,1.16,0.98,0.097,0.074,4.30,21.1397,Radial Velocity,1996
2,2,1RXS J160929.1-210524 b,1RXS J160929.1-210524,,330.00000,,18.647,4000.00000,4000.00000,,...,,4060.00,1.31,0.85,-0.370,,4.00,139.1350,Imaging,2008
3,3,47 UMa b,47 UMa,1076.600000,2.05900,0.0160,,774.86566,,,...,,5829.16,1.14,1.01,0.198,0.026,4.33,13.7967,Radial Velocity,1996
4,4,51 Peg b,51 Peg,4.230797,0.05235,0.0042,,147.47238,146.20180,,...,80.000,5760.76,1.19,1.07,0.136,0.206,4.32,15.4614,Radial Velocity,1995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3659,3659,pi Men c,HD 39091,6.267829,0.06900,0.0770,2.042,3.49611,3.63000,2.97,...,87.310,5870.00,1.10,1.02,0.160,0.050,4.36,18.2702,Transit,2018
3660,3660,tau Boo b,tau Boo,3.312453,0.04869,0.0074,,1366.66215,1891.00000,,...,44.500,6461.32,1.44,1.40,0.505,0.272,4.26,15.6521,Radial Velocity,1996
3661,3661,ups And b,ups And,4.617122,0.05914,0.0069,,214.53417,,,...,,6156.77,1.62,1.29,0.525,0.122,4.13,13.4054,Radial Velocity,1996
3662,3662,ups And c,ups And,241.223000,0.82650,0.2660,,624.53282,4443.24113,,...,,6156.77,1.62,1.29,0.525,0.122,4.13,13.4054,Radial Velocity,1999


array(['Unnamed: 0', 'pl_name', 'hostname', 'pl_orbper', 'pl_orbsmax',
       'pl_orbeccen', 'pl_rade', 'pl_bmasse', 'pl_masse', 'pl_dens',
       'pl_insol', 'pl_eqt', 'pl_orbincl', 'st_teff', 'st_rad', 'st_mass',
       'st_lum', 'st_met', 'st_logg', 'sy_dist', 'discoverymethod',
       'disc_year'], dtype=object)

In [11]:
def classify_planet(radius, mass, temp):
    missing_values = sum(x is None for x in [radius, mass, temp])

    # Completely unknown if more than one value is missing
    if missing_values > 1:
        return "Unknown"

    # Partially known if exactly one value is missing
    if missing_values == 1:
        return "Partially Known"

    # Fully known classification
    if radius < 0.5 and mass < 0.5 and 150 <= temp <= 400:
        return "Sub-Earth"
    elif 0.5 <= radius <= 1.5 and 0.5 <= mass <= 5 and 180 <= temp <= 370:
        return "Earth-like"
    elif 1.5 < radius <= 2.5 and 5 < mass <= 10 and 100 <= temp < 180:
        return "Cold Super-Earth"
    elif 1.5 < radius <= 2.5 and 5 < mass <= 10 and 370 < temp <= 800:
        return "Hot Super-Earth"
    elif 2.5 < radius <= 4 and 10 < mass <= 50 and 300 < temp <= 1000:
        return "Mini-Neptune"
    elif 4 < radius <= 6 and 50 < mass <= 100 and 500 < temp <= 1500:
        return "Neptune-like"
    elif radius > 6 and mass > 100 and temp > 800:
        return "Gas Giant"
    else:
        return "Unknown"


In [12]:
nasa['pl_type'] = nasa.apply(lambda row: classify_planet(row['pl_rade'], row['pl_bmasse'], row['pl_eqt']), axis=1)
display(nasa)

print(nasa['pl_type'].value_counts())

Unnamed: 0.1,Unnamed: 0,pl_name,hostname,pl_orbper,pl_orbsmax,pl_orbeccen,pl_rade,pl_bmasse,pl_masse,pl_dens,...,st_teff,st_rad,st_mass,st_lum,st_met,st_logg,sy_dist,discoverymethod,disc_year,pl_type
0,0,14 Her b,14 Her,1765.038900,2.77400,0.3730,,2559.47216,2559.47216,,...,5314.94,1.00,0.91,-0.153,0.405,4.43,17.9323,Radial Velocity,2002,Unknown
1,1,16 Cyg B b,16 Cyg B,799.450000,1.67600,0.6832,,556.83537,,,...,5711.97,1.16,0.98,0.097,0.074,4.30,21.1397,Radial Velocity,1996,Unknown
2,2,1RXS J160929.1-210524 b,1RXS J160929.1-210524,,330.00000,,18.647,4000.00000,4000.00000,,...,4060.00,1.31,0.85,-0.370,,4.00,139.1350,Imaging,2008,Gas Giant
3,3,47 UMa b,47 UMa,1076.600000,2.05900,0.0160,,774.86566,,,...,5829.16,1.14,1.01,0.198,0.026,4.33,13.7967,Radial Velocity,1996,Unknown
4,4,51 Peg b,51 Peg,4.230797,0.05235,0.0042,,147.47238,146.20180,,...,5760.76,1.19,1.07,0.136,0.206,4.32,15.4614,Radial Velocity,1995,Unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3659,3659,pi Men c,HD 39091,6.267829,0.06900,0.0770,2.042,3.49611,3.63000,2.97,...,5870.00,1.10,1.02,0.160,0.050,4.36,18.2702,Transit,2018,Unknown
3660,3660,tau Boo b,tau Boo,3.312453,0.04869,0.0074,,1366.66215,1891.00000,,...,6461.32,1.44,1.40,0.505,0.272,4.26,15.6521,Radial Velocity,1996,Unknown
3661,3661,ups And b,ups And,4.617122,0.05914,0.0069,,214.53417,,,...,6156.77,1.62,1.29,0.525,0.122,4.13,13.4054,Radial Velocity,1996,Unknown
3662,3662,ups And c,ups And,241.223000,0.82650,0.2660,,624.53282,4443.24113,,...,6156.77,1.62,1.29,0.525,0.122,4.13,13.4054,Radial Velocity,1999,Unknown


pl_type
Unknown            3214
Gas Giant           357
Mini-Neptune         64
Hot Super-Earth      22
Earth-like            4
Neptune-like          3
Name: count, dtype: int64
