In [3]:
import pandas as pd
import numpy as np

df = pd.read_csv("../data/clean_planet_data.csv")
df.head()



Unnamed: 0,Planet Name,Planet Radius (Earth),Planet Mass (Earth),Orbital Distance (AU),Star Temperature (K),Habitability Label
0,1RXS J160929.1-210524 b,18.647,2543.0,330.0,4060.0,0
1,2MASS J02192210-3925225 b,16.14096,4417.837,156.0,3064.0,0
2,55 Cnc e,1.91,8.08,0.01544,5250.0,0
3,55 Cnc e,1.92,8.08,0.01544,5250.0,0
4,55 Cnc e,2.173,8.37,0.01583,5250.0,0


In [4]:
df.describe()[[
    "Planet Radius (Earth)",
    "Planet Mass (Earth)",
    "Orbital Distance (AU)",
    "Star Temperature (K)"
]]

Unnamed: 0,Planet Radius (Earth),Planet Mass (Earth),Orbital Distance (AU),Star Temperature (K)
count,845.0,845.0,845.0,845.0
mean,8.866735,386.127065,11.971057,5552.860391
std,6.458727,838.671455,264.768281,1402.837775
min,0.46,0.07,0.0058,2320.0
25%,2.53,8.82,0.04,5131.0
50%,10.0881,117.5971,0.0529,5627.0
75%,14.022459,365.5045,0.09309,6040.0
max,77.3421,8654.15,7506.0,27730.0


In [5]:
def describe_radius(r):
    if pd.isna(r):
        return "unknown size"
    if r < 0.5:
        return "smaller than Earth"
    elif r < 1.5:
        return "similar in size to Earth"
    elif r < 3:
        return "a super-Earth or mini-Neptune"
    elif r < 10:
        return "a Neptune-sized planet"
    else:
        return "a gas giant much larger than Earth"

In [6]:
def describe_mass(m):
    if pd.isna(m):
        return "unknown mass"
    if m < 0.1:
        return "much less massive than Earth"
    elif m < 5:
        return "a few times Earth's mass"
    elif m < 20:
        return "significantly more massive than Earth"
    elif m < 100:
        return "a very massive planet"
    else:
        return "an extremely massive planet"

In [7]:
def describe_orbit(a):
    if pd.isna(a):
        return "at an unknown distance from its star"
    if a < 0.1:
        return "very close to its star"
    elif a < 0.5:
        return "in a close-in orbit around its star"
    elif a < 2:
        return "at a distance comparable to Earth's orbit"
    elif a < 10:
        return "in a relatively wide orbit"
    else:
        return "in a very distant orbit far from its star"

In [8]:
def describe_star_temp(t):
    if pd.isna(t):
        return "around a star of unknown temperature"
    if t < 3500:
        return "around a cool, red star"
    elif t < 5500:
        return "around a relatively cool, Sun-like star"
    elif t < 7000:
        return "around a warmer, F-type star"
    else:
        return "around a very hot, blue star"

In [9]:
def generate_planet_description(row):
    name = row.get("Planet Name", "This planet")
    r = row["Planet Radius (Earth)"]
    m = row["Planet Mass (Earth)"]
    a = row["Orbital Distance (AU)"]
    t = row["Star Temperature (K)"]

    radius_phrase = describe_radius(r)
    mass_phrase = describe_mass(m)
    orbit_phrase = describe_orbit(a)
    star_phrase = describe_star_temp(t)
    
    desc = (
        f"{name} is {radius_phrase} with {mass_phrase}. "
        f"It orbits {orbit_phrase} {star_phrase}."
    )

    desc += (
        f" Numerically, it has a radius of {r:.2f} Earth radii, "
        f"a mass of {m:.2f} Earth masses, an orbital distance of {a:.2f} AU, "
        f"and its star's effective temperature is {t:.0f} K."
    )

    return desc

df["Text Description"] = df.apply(generate_planet_description, axis=1)
df[["Planet Name", "Text Description"]].head(5)

Unnamed: 0,Planet Name,Text Description
0,1RXS J160929.1-210524 b,1RXS J160929.1-210524 b is a gas giant much la...
1,2MASS J02192210-3925225 b,2MASS J02192210-3925225 b is a gas giant much ...
2,55 Cnc e,55 Cnc e is a super-Earth or mini-Neptune with...
3,55 Cnc e,55 Cnc e is a super-Earth or mini-Neptune with...
4,55 Cnc e,55 Cnc e is a super-Earth or mini-Neptune with...


In [10]:
df["Text Description"].isna().sum()

np.int64(0)

In [11]:
df[["Planet Name", "Text Description"]].sample(10, random_state=42)

Unnamed: 0,Planet Name,Text Description
493,HD 221416 b,HD 221416 b is a Neptune-sized planet with a v...
215,HAT-P-23 b,HAT-P-23 b is a gas giant much larger than Ear...
622,K2-18 b,K2-18 b is a super-Earth or mini-Neptune with ...
738,KELT-19 A b,KELT-19 A b is a gas giant much larger than Ea...
63,CoRoT-26 b,CoRoT-26 b is a gas giant much larger than Ear...
247,HAT-P-34 b,HAT-P-34 b is a gas giant much larger than Ear...
798,Kepler-107 e,Kepler-107 e is a super-Earth or mini-Neptune ...
66,CoRoT-28 b,CoRoT-28 b is a gas giant much larger than Ear...
478,HD 209458 b,HD 209458 b is a gas giant much larger than Ea...
30,CoRoT-13 b,CoRoT-13 b is a Neptune-sized planet with an e...


In [12]:
df.to_csv("../data/final_planet_dataset.csv", index=False)
df.shape

(845, 7)