In [22]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/datasets/nehapatel8969/exoplanet-raw-data/exoplanet_nasa_raw (1) (1).csv


In [23]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [24]:
df = pd.read_csv("/kaggle/input/datasets/nehapatel8969/exoplanet-raw-data/exoplanet_nasa_raw (1) (1).csv")

In [25]:
df.columns = (
    df.columns
    .str.strip()
    .str.lower()
    .str.replace(" ", "_")
)

print("Columns:", df.columns.tolist())

Columns: ['planet_radius', 'planet_mass', 'orbital_period', 'semi_major_axis', 'equilibrium_temp', 'planet_density', 'star_temp', 'luminosity', 'metallicity', 'star_type']


In [26]:
label_encoder = LabelEncoder()
df["star_type"] = label_encoder.fit_transform(df["star_type"].astype(str))

In [27]:
df["habitable"] = np.where(
    (df["equilibrium_temp"] >= 200) & (df["equilibrium_temp"] <= 350) &
    (df["planet_radius"] >= 0.5) & (df["planet_radius"] <= 2.0) &
    (df["semi_major_axis"] >= 0.3) & (df["semi_major_axis"] <= 1.5),
    1, 0
)

print(df["habitable"].value_counts())

habitable
0    347
1    153
Name: count, dtype: int64


In [28]:
features = [
    "planet_radius",
    "planet_mass",
    "orbital_period",
    "semi_major_axis",
    "equilibrium_temp",
    "planet_density",
    "star_temp",
    "luminosity",
    "metallicity",
    "star_type"
]

X = df[features]
y = df["habitable"]

In [29]:
imputer = SimpleImputer(strategy="median")
X = imputer.fit_transform(X)

In [30]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [31]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [32]:
model = RandomForestClassifier(
    n_estimators=300,
    max_depth=10,
    random_state=42
)

model.fit(X_train, y_train)

In [33]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

Accuracy: 0.96

Classification Report:

              precision    recall  f1-score   support

           0       0.95      1.00      0.97        69
           1       1.00      0.87      0.93        31

    accuracy                           0.96       100
   macro avg       0.97      0.94      0.95       100
weighted avg       0.96      0.96      0.96       100



In [34]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

Confusion Matrix:
 [[69  0]
 [ 4 27]]


In [35]:
new_planet = pd.DataFrame([{
    "planet_radius": 1.1,
    "planet_mass": 1.2,
    "orbital_period": 365,
    "semi_major_axis": 1.0,
    "equilibrium_temp": 288,
    "planet_density": 5.5,
    "star_temp": 5778,
    "luminosity": 1.0,
    "metallicity": 0.0,
    "star_type": label_encoder.transform(["G"])[0]
}])

In [36]:
new_planet_imputed = imputer.transform(new_planet)
new_planet_scaled = scaler.transform(new_planet_imputed)

In [37]:
prediction = model.predict(new_planet_scaled)
probability = model.predict_proba(new_planet_scaled)

if prediction[0] == 1:
    print("üåç Prediction: Potentially Habitable Exoplanet")
else:
    print("‚ùå Prediction: Not Habitable")

print("Prediction Probability [Not Habitable, Habitable]:", probability)

üåç Prediction: Potentially Habitable Exoplanet
Prediction Probability [Not Habitable, Habitable]: [[0.25562452 0.74437548]]
