In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report

In [3]:
# Load the data
df = pd.read_excel("C:/Users/baner/Documents/Planet-Hunt/data/Study_of_exoplanets.xlsx")

# Data preprocessing
M = df.P_MASS.median()
df['P_MASS'] = df.P_MASS.fillna(M)
R = df.P_RADIUS.median()
df['P_RADIUS'] = df.P_RADIUS.fillna(R)
P = df.P_PERIOD.median()
df['P_PERIOD'] = df.P_PERIOD.fillna(P)
S = df.P_SEMI_MAJOR_AXIS.median()
df['P_SEMI_MAJOR_AXIS'] = df.P_SEMI_MAJOR_AXIS.fillna(S)
E = df.P_ECCENTRICITY.median()
df['P_ECCENTRICITY'] = df.P_ECCENTRICITY.fillna(E)
I = df.P_INCLINATION.median()
df['P_INCLINATION'] = df.P_INCLINATION.fillna(I)
Es = df.P_ESCAPE.median() 
df['P_ESCAPE'] = df.P_ESCAPE.fillna(Es)
Pot = df.P_POTENTIAL.median()
df['P_POTENTIAL'] = df.P_POTENTIAL.fillna(Pot)
G = df.P_GRAVITY.median()
df['P_GRAVITY'] = df.P_GRAVITY.fillna(G)
D = df.P_DENSITY.median()
df['P_DENSITY'] = df.P_DENSITY.fillna(D)

In [4]:
# Features and target variable
X = df[['P_MASS', 'P_RADIUS', 'P_PERIOD', 'P_SEMI_MAJOR_AXIS', 'P_ECCENTRICITY', 
          'P_INCLINATION', 'P_ESCAPE', 'P_POTENTIAL', 'P_GRAVITY', 'P_DENSITY'
          ]]
          
y = df['P_HABITABLE']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# DL model
model = keras.Sequential([
    # Input layer
    keras.layers.InputLayer(input_shape=(X_train.shape[1],)),

    # Hidden layers
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(16, activation='relu'),
    
    # Output layer
    keras.layers.Dense(3, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])


# Train the model
history = model.fit(X_train_scaled, y_train, validation_split=0.2, epochs=50, batch_size=32)



Epoch 1/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9714 - loss: 0.5902 - val_accuracy: 0.9784 - val_loss: 0.1505
Epoch 2/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9858 - loss: 0.1074 - val_accuracy: 0.9784 - val_loss: 0.1453
Epoch 3/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9886 - loss: 0.0764 - val_accuracy: 0.9784 - val_loss: 0.1386
Epoch 4/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9900 - loss: 0.0661 - val_accuracy: 0.9784 - val_loss: 0.1293
Epoch 5/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9914 - loss: 0.0573 - val_accuracy: 0.9784 - val_loss: 0.1238
Epoch 6/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9843 - loss: 0.0876 - val_accuracy: 0.9784 - val_loss: 0.1265
Epoch 7/50
[1m81/81[0m [32m━━━━━━━━━━

In [5]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"Test accuracy: {test_acc}")


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9842 - loss: 0.0622 
Test accuracy: 0.9839506149291992
