In [20]:
import pandas as pd
import numpy as np
import duckdb as db
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# This file was produced by the NASA Exoplanet Archive  http://exoplanetarchive.ipac.caltech.edu
# Mon Mar 10 14:42:21 2025
#
# COLUMN pl_name:        Planet Name
# COLUMN hostname:       Host Name
# COLUMN default_flag:   Default Parameter Set
# COLUMN sy_snum:        Number of Stars
# COLUMN sy_pnum:        Number of Planets
# COLUMN discoverymethod: Discovery Method
# COLUMN disc_year:      Discovery Year
# COLUMN disc_facility:  Discovery Facility
# COLUMN soltype:        Solution Type
# COLUMN pl_controv_flag: Controversial Flag
# COLUMN pl_refname:     Planetary Parameter Reference
# COLUMN pl_orbper:      Orbital Period [days]
# COLUMN pl_orbpererr1:  Orbital Period Upper Unc. [days]
# COLUMN pl_orbpererr2:  Orbital Period Lower Unc. [days]
# COLUMN pl_orbperlim:   Orbital Period Limit Flag
# COLUMN pl_orbsmax:     Orbit Semi-Major Axis [au]
# COLUMN pl_orbsmaxerr1: Orbit Semi-Major Axis Upper Unc. [au]
# COLUMN pl_orbsmaxerr2: Orbit Semi-Major Axis Lower Unc. [au]
# COLUMN pl_orbsmaxlim:  Orbit Semi-Major Axis Limit Flag
# COLUMN pl_rade:        Planet Radius [Earth Radius]
# COLUMN pl_radeerr1:    Planet Radius Upper Unc. [Earth Radius]
# COLUMN pl_radeerr2:    Planet Radius Lower Unc. [Earth Radius]
# COLUMN pl_radelim:     Planet Radius Limit Flag
# COLUMN pl_radj:        Planet Radius [Jupiter Radius]
# COLUMN pl_radjerr1:    Planet Radius Upper Unc. [Jupiter Radius]
# COLUMN pl_radjerr2:    Planet Radius Lower Unc. [Jupiter Radius]
# COLUMN pl_radjlim:     Planet Radius Limit Flag
# COLUMN pl_bmasse:      Planet Mass or Mass*sin(i) [Earth Mass]
# COLUMN pl_bmasseerr1:  Planet Mass or Mass*sin(i) [Earth Mass] Upper Unc.
# COLUMN pl_bmasseerr2:  Planet Mass or Mass*sin(i) [Earth Mass] Lower Unc.
# COLUMN pl_bmasselim:   Planet Mass or Mass*sin(i) [Earth Mass] Limit Flag
# COLUMN pl_bmassj:      Planet Mass or Mass*sin(i) [Jupiter Mass]
# COLUMN pl_bmassjerr1:  Planet Mass or Mass*sin(i) [Jupiter Mass] Upper Unc.
# COLUMN pl_bmassjerr2:  Planet Mass or Mass*sin(i) [Jupiter Mass] Lower Unc.
# COLUMN pl_bmassjlim:   Planet Mass or Mass*sin(i) [Jupiter Mass] Limit Flag
# COLUMN pl_bmassprov:   Planet Mass or Mass*sin(i) Provenance
# COLUMN pl_orbeccen:    Eccentricity
# COLUMN pl_orbeccenerr1: Eccentricity Upper Unc.
# COLUMN pl_orbeccenerr2: Eccentricity Lower Unc.
# COLUMN pl_orbeccenlim: Eccentricity Limit Flag
# COLUMN pl_insol:       Insolation Flux [Earth Flux]
# COLUMN pl_insolerr1:   Insolation Flux Upper Unc. [Earth Flux]
# COLUMN pl_insolerr2:   Insolation Flux Lower Unc. [Earth Flux]
# COLUMN pl_insollim:    Insolation Flux Limit Flag
# COLUMN pl_eqt:         Equilibrium Temperature [K]
# COLUMN pl_eqterr1:     Equilibrium Temperature Upper Unc. [K]
# COLUMN pl_eqterr2:     Equilibrium Temperature Lower Unc. [K]
# COLUMN pl_eqtlim:      Equilibrium Temperature Limit Flag
# COLUMN ttv_flag:       Data show Transit Timing Variations
# COLUMN st_refname:     Stellar Parameter Reference
# COLUMN st_spectype:    Spectral Type
# COLUMN st_teff:        Stellar Effective Temperature [K]
# COLUMN st_tefferr1:    Stellar Effective Temperature Upper Unc. [K]
# COLUMN st_tefferr2:    Stellar Effective Temperature Lower Unc. [K]
# COLUMN st_tefflim:     Stellar Effective Temperature Limit Flag
# COLUMN st_rad:         Stellar Radius [Solar Radius]
# COLUMN st_raderr1:     Stellar Radius Upper Unc. [Solar Radius]
# COLUMN st_raderr2:     Stellar Radius Lower Unc. [Solar Radius]
# COLUMN st_radlim:      Stellar Radius Limit Flag
# COLUMN st_mass:        Stellar Mass [Solar mass]
# COLUMN st_masserr1:    Stellar Mass Upper Unc. [Solar mass]
# COLUMN st_masserr2:    Stellar Mass Lower Unc. [Solar mass]
# COLUMN st_masslim:     Stellar Mass Limit Flag
# COLUMN st_met:         Stellar Metallicity [dex]
# COLUMN st_meterr1:     Stellar Metallicity Upper Unc. [dex]
# COLUMN st_meterr2:     Stellar Metallicity Lower Unc. [dex]
# COLUMN st_metlim:      Stellar Metallicity Limit Flag
# COLUMN st_metratio:    Stellar Metallicity Ratio
# COLUMN st_logg:        Stellar Surface Gravity [log10(cm/s**2)]
# COLUMN st_loggerr1:    Stellar Surface Gravity Upper Unc. [log10(cm/s**2)]
# COLUMN st_loggerr2:    Stellar Surface Gravity Lower Unc. [log10(cm/s**2)]
# COLUMN st_logglim:     Stellar Surface Gravity Limit Flag
# COLUMN sy_refname:     System Parameter Reference
# COLUMN rastr:          RA [sexagesimal]
# COLUMN ra:             RA [deg]
# COLUMN decstr:         Dec [sexagesimal]
# COLUMN dec:            Dec [deg]
# COLUMN sy_dist:        Distance [pc]
# COLUMN sy_disterr1:    Distance [pc] Upper Unc
# COLUMN sy_disterr2:    Distance [pc] Lower Unc
# COLUMN sy_vmag:        V (Johnson) Magnitude
# COLUMN sy_vmagerr1:    V (Johnson) Magnitude Upper Unc
# COLUMN sy_vmagerr2:    V (Johnson) Magnitude Lower Unc
# COLUMN sy_kmag:        Ks (2MASS) Magnitude
# COLUMN sy_kmagerr1:    Ks (2MASS) Magnitude Upper Unc
# COLUMN sy_kmagerr2:    Ks (2MASS) Magnitude Lower Unc
# COLUMN sy_gaiamag:     Gaia Magnitude
# COLUMN sy_gaiamagerr1: Gaia Magnitude Upper Unc
# COLUMN sy_gaiamagerr2: Gaia Magnitude Lower Unc
# COLUMN rowupdate:      Date of Last Update
# COLUMN pl_pubdate:     Planetary Parameter Reference Publication Date
# COLUMN releasedate:    Release Date
#

In [None]:
# all the headers for open exoplanet catalogue data

#name,binaryflag,mass,radius,period,semimajoraxis,eccentricity,periastron,longitude,ascendingnode,inclination,temperature,age,discoverymethod,discoveryyear,lastupdate,system_rightascension,system_declination,system_distance,hoststar_mass,hoststar_radius,hoststar_metallicity,hoststar_temperature,hoststar_age,list


In [4]:
data = pd.read_csv('ExoplanetData.csv')
data2 = pd.read_csv('open_exoplanet_catalogue.txt')

In [19]:
nameData = data[['pl_name', 'st_spectype']]
nameData2 = data2[['name', 'temperature']]

print(nameData.head())
#nameData2.head()

stypes = data['st_spectype'].unique()
print(stypes)

distance = data[['pl_name', 'sy_dist']].dropna()
distance = distance.sort_values(by='sy_dist', ascending=False)
# plt.figure(figsize=(20,10))
# plt.scatter(distance['pl_name'], distance['sy_dist'])

# plt.xticks([], [])
# plt.xlabel("Exoplanets")
# plt.ylabel("Distance from Earth (parsecs)")
# plt.title("Distance of Exoplanets from Earth")

# plt.show()


    pl_name st_spectype
0  11 Com b      G8 III
1  11 Com b         NaN
2  11 Com b      G8 III
3  11 UMi b         NaN
4  11 UMi b      K4 III
['G8 III' nan 'K4 III' 'K0 III' 'K0 V' 'G2.5 V' 'G3 V' 'K3 III' 'G6 III'
 'K7 V' 'G3 IV' 'M V' 'M5.5/M6' 'M3.5 V' 'M6' 'M6 V' 'M8.5' 'M8' 'F6 V'
 'K1 III' 'K1.5 III' 'G1 V' 'G0 V' 'F0 IV' 'K0 IV-V' 'G8 V' 'K0 IV' 'G5 V'
 'G3 III' 'K0' 'G5 III' 'K2 V' 'F8 V' 'M1' 'K2 II' 'K5 III' 'G' 'K1' 'K3'
 'K7/M0 V' 'K3 V' 'K5 V' 'M3.5-4 V' 'M5.0 V' 'M9-L1' 'M3.5' 'M3 V' 'K1 V'
 'F9 V' 'G2 V' 'G9 V' 'F8 IV' 'F9' 'G5' 'G2' 'G8/9 IV' 'F3 V' 'G2 IV'
 'G0 VI' 'L1.5' 'M0.5' 'F5 V' 'F5-F8 V' 'G6 V' 'G0' 'G8 IV/V' 'M7.25'
 'M4.0 V' 'M2.5 V' 'M5.5 V' 'M4.5 V' 'M 4.5V' 'M4 V' 'M3' 'K4.5' 'M2 V'
 'M0 V' 'M1/M2 V' 'M3.0 V' 'M0.5 V' 'M0' 'M0.0' 'M2.5' 'M4' 'M0.0 V'
 'M1.5' 'M1.0 V' 'M2.0 V' 'M1.9' 'M1.5 V' 'M4.0' 'M3.5 Ve' 'M0.5-1.0 V'
 'M1 V' 'M2' 'M1.0' 'M2/3 V' 'K7' 'K6 V' 'F2' 'K7e V' 'K4' 'G4' 'F' 'F8'
 'K' 'K2' 'G3' 'G8' 'F4' 'F3' 'A8 V' 'G V' 'K V' 'F V' 'B9 Vne

In [23]:
# Define a color palette for broad spectral types
spectral_palette = {
    'O': 'blue', 'B': 'royalblue', 'A': 'cyan', 'F': 'green', 
    'G': 'yellow', 'K': 'orange', 'M': 'red', 'L': 'purple',
    'T': 'brown', 'Y': 'pink', 'D': 'gray'  # White Dwarfs (D)
}

# Select two properties for the scatter plot (e.g., Planet Radius vs. Orbital Period)
x_col = "pl_rade"  # Planet Radius in Earth radii
y_col = "pl_orbper"  # Orbital Period in days

# Filter out NaNs
df_filtered = data[[x_col, y_col, "st_spectype"]].dropna()

# Create scatter plot
plt.figure(figsize=(10, 6))
sns.scatterplot(
    data=df_filtered, 
    x=x_col, 
    y=y_col, 
    hue=df_filtered["st_spectype"], 
    palette=spectral_palette,
    alpha=0.7  # Transparency for better visibility
)

plt.xscale("log")  # Optional: Log scale if values span large ranges
plt.yscale("log")
plt.xlabel("Planet Radius (Earth Radii)")
plt.ylabel("Orbital Period (days)")
plt.title("Exoplanets: Orbital Period vs. Planet Radius, Colored by Spectral Type")
plt.legend(title="Spectral Type", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True, which="both", linestyle="--", alpha=0.5)
plt.show()

ValueError: The palette dictionary is missing keys: {'A5 V', 'K0', 'K4.5', 'G5V', 'K1.5+/-1', 'F4', 'M0.5 V', 'M2', 'K6', 'K3', 'G2 V', 'M1.0 V', 'M V', 'G6 V', 'M1.0', 'M(6.1 +/- 0.7) V', 'M1', 'M0', 'M4.0', 'K1', 'G8 V', 'F9-F9.5', 'G2 IV', 'G8', 'M3.0 V', 'G3', 'K5', 'M6 V', 'G1', 'G9', 'G4 V', 'G2', 'F8/G0 V', 'G9/K0', 'K4 V', 'G8/9 IV', 'M2.0 V', 'K0 V', 'M2.65 V', 'K0 IV', 'M4.5', 'F6 IV', 'M6.5+/-0.5', 'G9 IV', 'G3 V', 'A8', 'F6', 'F7/F8V', 'G8/G9', 'K0.5 V', 'M1.5', 'K0.0 V', 'M0.5+/-0.5 V', 'G1 IV', 'M5V', 'G V', 'A7 V', 'M0.0', 'M3+/-1', 'F8 V', 'F2', 'K2', 'F8', 'K0-2 V', 'K V', 'K5.5', 'M0.0 V', 'K2.5 V', 'K8 V', 'M2.5 V', 'F2 V', 'M9-L1', 'G9 V', 'M4', 'G7 V', 'F7 IV', 'K1-K2', 'G7', 'K7 V', 'M3', 'M4 &#177 0.5 V', 'F9 V', 'M2 V', 'M3.5+/-0.5', 'K7', 'G6', 'F1 V', 'K2V', 'Am C', 'G0/F9 V', 'M4.5+/-0.5', 'M3.5 +/- 1.0', 'F6 V', 'M4.5 V', 'M2+/-0.5', 'm3 V', 'M1 V', 'M5 V', 'M1.0+/-0.5', 'K9 V/M0 V', 'A8 V', 'A1 IV-V', 'M0 V', 'B9.5-A0', 'F6 IV-V', 'F0', 'K4.5 V', 'M3 V', 'K7.5 V', 'F5V', 'M(6.1+/-0.7) V', 'M5.0+0.5-0.5', 'F5 V', 'G0 V', 'M5.0', 'G0', 'M1.5 V', 'G5', 'F7 V', 'M4 V', 'F9', 'A2', 'K4', 'K1 V', 'G8 IV/V', 'M4+/-0.5', 'K2 V', 'F9/G0', 'G5 V', 'G0 VI', 'F9 IV', 'F5 IV', 'K5 V', 'K1/2 V', 'M(3.5+/-0.5) V', 'K7-M0 V', 'M3.0', 'K3.5 V', 'F V', 'M3.5', 'F3 V', 'M5.5+/-0.5', 'M4.5V-M5V', 'G0 IV', 'M0.5-1.0 V', 'M3.3', 'A2 V', 'G7 IV/V', 'G1 V', 'M3.5 V', 'M0.5', 'K6-K7', 'F8 IV', 'F7', 'G4', 'F5', 'G0 IV-V', 'F4 V', 'G3 IV', 'M5', 'K6 V', 'K3 V'}

<Figure size 1000x600 with 0 Axes>