In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Load the dataset
df = pd.read_csv("exoplanet_scores.csv")

# **Step 1: Normalize necessary columns**
columns_to_normalize = ["pl_eqt", "st_met", "surface_gravity"]
for col in columns_to_normalize:
    if col in df.columns:
        mean_value = df[col].mean(skipna=True)
        std_value = df[col].std(skipna=True)
        df[f"{col}_normalized"] = (df[col] - mean_value) / (std_value if std_value != 0 else 1)  # Prevent division by zero

# **Step 2: Calculate Scores**
df["Habitability Score"] = (
    (1 / (1 + np.exp(-(df["pl_rade"] - 2)))) * 50 +  
    (1 / (1 + np.exp(-(df["pl_eqt"] / 300)))) * 30 +  # Use pl_eqt directly
    (df["st_met_normalized"] * 10)  # Reduce impact of metallicity
).round()

df["Terraformability Score"] = (
    (1 / (1 + np.exp(-((df["pl_bmasse"] - 5) / 5)))) * 40 +  # Adjust scaling
    (1 / (1 + np.exp(-((df["pl_orbper"] - 100) / 50)))) * 30 +  # Adjust scaling
    (df["surface_gravity_normalized"] * 20)  # Reduce weight of gravity
).round()

# **Step 3: Prepare for Model Training**
features = ["pl_rade", "pl_bmasse", "pl_eqt_normalized", "st_met_normalized", "surface_gravity_normalized"]
target_habitability = "Habitability Score"
target_terraformability = "Terraformability Score"

# Drop NaN values only for training
df_train = df.dropna(subset=features + [target_habitability, target_terraformability])

# **Ensure enough data exists before training**
if df_train.shape[0] < 5:
    print("⚠️ Not enough data for training. Skipping model training.")
else:
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        df_train[features], df_train[[target_habitability, target_terraformability]], 
        test_size=0.2, random_state=42
    )

    # Train Model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Make Predictions
    y_pred = model.predict(X_test)

    # **Step 6: Evaluate Model**
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    print(f"📏 Mean Absolute Error: {mae:.2f}")
    print(f"📉 Mean Squared Error: {mse:.2f}")

    # **Step 7: Apply Model to Full Dataset**
    df_pred_indices = df.dropna(subset=features).index  # Find valid indices
    df.loc[df_pred_indices, ["Habitability Score", "Terraformability Score"]] = model.predict(df.loc[df_pred_indices, features])


df = df.drop(columns = ["habitability_score", "terraforming_score"])
# Save the updated dataset
df.to_csv("exoplanet_scores_cleaned.csv", index=False)
print("✅ Updated dataset saved with predicted scores.")
df


📏 Mean Absolute Error: 2.19
📉 Mean Squared Error: 283.96
✅ Updated dataset saved with predicted scores.


Unnamed: 0,pl_name,pl_rade,pl_bmasse,pl_orbper,pl_eqt,st_teff,st_mass,st_rad,st_met,pl_eqt_normalized,st_met_normalized,surface_gravity,surface_gravity_normalized,Habitability Score,Terraformability Score
0,OGLE-2016-BLG-1227L b,13.900,250.00000,,912.530715,5412.39249,0.10,,0.014263,0.000000,-3.370912e-16,1.293929,-0.153474,79.07,46.1100
1,Kepler-24 e,2.780,8.15000,18.998355,792.000000,5897.00000,1.05,1.29,-0.071000,-0.302064,-4.733796e-01,1.054552,-0.167278,57.78,27.8100
2,Kepler-1065 b,3.730,13.40000,3.609309,1092.000000,5635.00000,0.94,0.93,-0.010000,0.449771,-1.347079e-01,0.963135,-0.172550,69.90,34.4100
3,HD 132406 b,12.800,1887.90074,908.000000,912.530715,5766.00000,0.97,1.34,0.129000,0.000000,6.370194e-01,11.522832,0.436389,85.01,77.3700
4,TOI-1260 c,2.760,13.20000,7.493134,651.000000,4227.00000,0.68,0.67,-0.100000,-0.655426,-6.343875e-01,1.732829,-0.128165,55.00,35.3200
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5851,GJ 229 b,3.970,14.93794,579.474950,912.530715,3564.00000,0.51,0.46,0.014263,0.000000,-3.370912e-16,0.947785,-0.173435,72.77,52.5900
5852,GJ 229 A c,2.870,8.58137,121.932680,912.530715,3912.54000,0.51,0.46,0.014263,0.000000,-3.370912e-16,1.041820,-0.168013,64.00,36.7500
5853,Kepler-974 b,1.570,3.09000,4.194497,577.000000,3687.00000,0.52,0.50,0.070000,-0.840878,3.094517e-01,1.253601,-0.155800,48.99,17.4700
5854,KOI-1843.03,0.610,8.00000,0.176891,1654.000000,3584.00000,0.46,0.45,0.000000,1.858207,-7.918795e-02,21.499597,1.011712,41.59,43.2900


In [2]:
# code to Estimate Atmoshere in exoplanets

In [3]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("exoplanet_scores_cleaned.csv")

# Define the sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Estimate stellar activity (st_activity) based on st_teff
def estimate_st_activity(st_teff):
    if pd.isna(st_teff):  # If no temperature data, assume moderate activity
        return 0.5  
    return max(0, min(1, 1 - (st_teff - 4000) / 4000))  # Keep value between 0 and 1

# Apply function to create a new column
df["st_activity"] = df["st_teff"].apply(estimate_st_activity)

# Estimate atmosphere presence probability
df["pl_atmos"] = sigmoid(
    0.5 * (df["pl_bmasse"] / 10) +   # Mass effect
    0.3 * df["surface_gravity"] -    # Gravity effect
    0.4 * ((df["pl_eqt"] - 255) / 100) -  # Temperature effect
    0.2 * df["st_activity"]          # Stellar activity effect
)

# 🔥 1. Surface Temperature Estimation (pl_surf_temp)
df["pl_surf_temp"] = df["pl_eqt"] * (1 + 0.1 * df["pl_atmos"])  # Greenhouse warming effect

# 🌀 2. Escape Velocity (pl_escape_vel)
df["pl_escape_vel"] = 11.2 * np.sqrt(df["pl_bmasse"] / df["pl_rade"])  # In km/s

df["pl_radiation_flux"] = ((df["st_teff"] / 5778) ** 4) * (df["st_rad"] ** 2) / (df["pl_orbper"] ** (2/3))
df["pl_radiation_flux"].fillna(0, inplace=True)  # Fill missing values with 0


# 🌍 4. Earth Similarity Index (ESI)
def calculate_esi(r, v, t):
    esi_r = (1 - abs((r - 1) / (r + 1))) ** 0.57
    esi_v = (1 - abs((v - 11.2) / (v + 11.2))) ** 0.7
    esi_t = (1 - abs((t - 288) / (t + 288))) ** 5.58  # 288K = Earth's avg. temp
    return esi_r * esi_v * esi_t

df["ESI"] = df.apply(lambda row: calculate_esi(row["pl_rade"], row["pl_escape_vel"], row["pl_surf_temp"]), axis=1)

# 🌊 5. Presence of Liquid Water (pl_water_probability)
df["pl_water_probability"] = 1 - abs((df["pl_surf_temp"] - 288) / 288)
df["pl_water_probability"] = df["pl_water_probability"].clip(0, 1) 

# ✅ Habitability Score Calculation (converted to percentage)
df["Habitability Score"] = sigmoid(
    0.5 * df["ESI"] + 
    0.4 * df["pl_water_probability"] - 
    0.3 * abs(df["pl_radiation_flux"] - 1)
) * 100  # Convert to percentage

# Convert Terraformability Score to %
df["Terraformability Score"] = sigmoid(
    0.5 * df["pl_atmos"] + 
    0.3 * df["pl_escape_vel"] / 15 -  
    0.2 * df["st_activity"] -  
    0.3 * abs(df["pl_radiation_flux"] - 1)
) * 100  # Convert to percentage
# Save updated dataset
df.to_csv("exoplanet_scores_Final.csv", index=False)
print("Updated CSV file saved with Habitability and Terraformability scores in percentage format!")

# Display dataframe (optional)
df


Updated CSV file saved with Habitability and Terraformability scores in percentage format!


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["pl_radiation_flux"].fillna(0, inplace=True)  # Fill missing values with 0


Unnamed: 0,pl_name,pl_rade,pl_bmasse,pl_orbper,pl_eqt,st_teff,st_mass,st_rad,st_met,pl_eqt_normalized,...,surface_gravity_normalized,Habitability Score,Terraformability Score,st_activity,pl_atmos,pl_surf_temp,pl_escape_vel,pl_radiation_flux,ESI,pl_water_probability
0,OGLE-2016-BLG-1227L b,13.900,250.00000,,912.530715,5412.39249,0.10,,0.014263,0.000000,...,-0.153474,42.577623,73.508386,0.646902,0.999960,1003.780144,47.498580,0.000000,0.001789,0.000000
1,Kepler-24 e,2.780,8.15000,18.998355,792.000000,5897.00000,1.05,1.29,-0.071000,-0.302064,...,-0.167278,44.618633,53.581554,0.525750,0.178098,806.105394,19.176725,0.253581,0.015668,0.000000
2,Kepler-1065 b,3.730,13.40000,3.609309,1092.000000,5635.00000,0.94,0.93,-0.010000,0.449771,...,-0.172550,45.053726,53.587472,0.591250,0.075347,1100.227930,21.228339,0.332518,0.003489,0.000000
3,HD 132406 b,12.800,1887.90074,908.000000,912.530715,5766.00000,0.97,1.34,0.129000,0.000000,...,0.436389,42.707095,94.344883,0.558500,1.000000,1003.783786,136.019952,0.018991,0.000982,0.000000
4,TOI-1260 c,2.760,13.20000,7.493134,651.000000,4227.00000,0.68,0.67,-0.100000,-0.655426,...,-0.128165,43.154515,54.718152,0.943250,0.355992,674.175104,24.493477,0.033578,0.028754,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5851,GJ 229 b,3.970,14.93794,579.474950,912.530715,3564.00000,0.51,0.46,0.014263,0.000000,...,-0.173435,42.645897,50.140841,1.000000,0.141986,925.487390,21.725417,0.000441,0.007109,0.000000
5852,GJ 229 A c,2.870,8.58137,121.932680,912.530715,3912.54000,0.51,0.46,0.014263,0.000000,...,-0.168013,42.676041,48.575035,1.000000,0.110219,922.588517,19.366689,0.001809,0.008752,0.000000
5853,Kepler-974 b,1.570,3.09000,4.194497,577.000000,3687.00000,0.52,0.50,0.070000,-0.840878,...,-0.155800,43.545388,48.943411,1.000000,0.277395,593.005696,15.712578,0.015937,0.071172,0.000000
5854,KOI-1843.03,0.610,8.00000,0.176891,1654.000000,3584.00000,0.46,0.45,0.000000,1.858207,...,1.011712,43.259542,67.051135,1.000000,0.741519,1776.647258,40.560014,0.095130,0.000383,0.000000


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5856 entries, 0 to 5855
Data columns (total 22 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   pl_name                     5856 non-null   object 
 1   pl_rade                     5856 non-null   float64
 2   pl_bmasse                   5856 non-null   float64
 3   pl_orbper                   5570 non-null   float64
 4   pl_eqt                      5856 non-null   float64
 5   st_teff                     5856 non-null   float64
 6   st_mass                     5849 non-null   float64
 7   st_rad                      5585 non-null   float64
 8   st_met                      5856 non-null   float64
 9   pl_eqt_normalized           5856 non-null   float64
 10  st_met_normalized           5856 non-null   float64
 11  surface_gravity             5856 non-null   float64
 12  surface_gravity_normalized  5856 non-null   float64
 13  Habitability Score          5856 

In [6]:
df.isnull().sum()

pl_name                         0
pl_rade                         0
pl_bmasse                       0
pl_orbper                     286
pl_eqt                          0
st_teff                         0
st_mass                         7
st_rad                        271
st_met                          0
pl_eqt_normalized               0
st_met_normalized               0
surface_gravity                 0
surface_gravity_normalized      0
Habitability Score              0
Terraformability Score          0
st_activity                     0
pl_atmos                        0
pl_surf_temp                    0
pl_escape_vel                   0
pl_radiation_flux               0
ESI                             0
pl_water_probability            0
dtype: int64