In [29]:
# Import our dependencies

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from google.colab import files

# Upload file
uploaded = files.upload()

# Load dataset (choose the Cleaned Dataset from Exoplanet/machine_analysis/Resources)
df = pd.read_csv('Cleaned Dataset.csv')

# Display the first few rows
df.head()

Saving Cleaned Dataset.csv to Cleaned Dataset.csv


Unnamed: 0,Planet_Name,Host_Star,Num_Planets,Discovery_Method,Discovery_Year,Discovery_Facility,Reference_Name,Orbital_Period_Days,Orbital_Period_Error_Upper,Orbital_Period_Error_Lower,...,ra,dec,System_Distance_PC,System_Visual_Magnitude,System_Infrared_Magnitude,Discovery_Date,features,scaled_features,Discovery_Method_Index,Discovery_Facility_Index
0,Kepler-138 c,Kepler-138,4,Transit,2014,Kepler,<a refstr=PIAULET_ET_AL__2022 href=https://ui....,13.7815,7e-05,-9e-05,...,290.381412,43.293064,66.8624,13.04,9.506,2014-01-01,"[1.51,2.3,3841.0]","[-1.23884583576289,-0.5751699806789807,-1.9022...",0.0,1.0
1,TIC 139270665 b,TIC 139270665,2,Transit,2024,Transiting Exoplanet Survey Satellite (TESS),<a refstr=PELUSO_ET_AL__2024 href=https://ui.a...,23.624,0.03,-0.031,...,124.033897,33.291453,189.875,10.385,8.946,2024-01-01,"[7.23,147.15455,5844.0]","[-0.1993775067875646,-0.27549828331573123,0.55...",0.0,0.0
2,TOI-3819 b,TOI-3819,1,Transit,2023,Transiting Exoplanet Survey Satellite (TESS),<a refstr=YEE_ET_AL__2023 href=https://ui.adsa...,3.244314,5.5e-06,-5.5e-06,...,121.863257,29.388645,558.141,12.542,11.077,2023-01-01,"[13.137,352.78953,5859.0]","[0.8740734406350308,0.14991454281819322,0.5692...",0.0,0.0
3,HD 136352 c,HD 136352,3,Radial Velocity,2019,La Silla Observatory,<a refstr=DELREZ_ET_AL__2021 href=https://ui.a...,27.59221,0.00011,-0.00011,...,230.440115,-48.318817,14.682,5.65,4.159,2019-01-01,"[2.916,11.24,5664.0]","[-0.9833401590951787,-0.5566751182502379,0.330...",1.0,14.0
4,HAT-P-68 b,HAT-P-68,1,Transit,2020,HATNet,<a refstr=LINDOR_ET_AL__2021 href=https://ui.a...,2.298406,5.2e-07,-5.2e-07,...,118.48315,23.938219,202.152,13.802,11.019,2020-01-01,"[12.016,230.10892,4508.0]","[0.6703594551837476,-0.10388422727400506,-1.08...",0.0,5.0


In [30]:
# Group by 'Host_Star' to aggregate planet counts and star features
star_data = df.groupby('Host_Star').agg({
    'Num_Planets': 'first',
    'Star_Temperature_K': 'first',
    'Star_Radius_Solar': 'first',
    'Star_Mass_Solar': 'first',
    'Star_Metallicity': 'first'
}).reset_index()

# Normalize features
features = ['Star_Temperature_K', 'Star_Radius_Solar', 'Star_Mass_Solar', 'Star_Metallicity']
X = star_data[features].values
y = star_data['Num_Planets'].values

# Normalize the features
X = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential([
    Dense(64, activation='relu', input_shape=(len(features),)),  # First hidden layer
    Dense(32, activation='relu'),  # Second hidden layer
    Dense(1)  # Output layer for regression
])

model.summary()

model.compile(optimizer=Adam(learning_rate=0.001), loss='mse' , metrics=["accuracy"])

history = model.fit(
    X_train, y_train,
    validation_split=0.2,  # Using part of training data for validation
    epochs=100,
    batch_size=32,
    verbose=1
)

model_loss, model_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

# Save model
model.save('nn_exo_planet_model.keras')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - accuracy: 1.6807e-04 - loss: 2.7746 - val_accuracy: 0.0738 - val_loss: 1.7565
Epoch 2/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.3954 - loss: 1.7077 - val_accuracy: 0.7517 - val_loss: 1.0418
Epoch 3/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.7604 - loss: 1.0021 - val_accuracy: 0.7584 - val_loss: 0.8484
Epoch 4/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.7906 - loss: 0.9304 - val_accuracy: 0.7584 - val_loss: 0.8309
Epoch 5/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.7798 - loss: 0.8844 - val_accuracy: 0.7584 - val_loss: 0.8156
Epoch 6/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7774 - loss: 1.0248 - val_accuracy: 0.7584 - val_loss: 0.8002
Epoch 7/100
[1m19/19[0m 