In [14]:
pip install tensorflow

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [15]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import joblib


In [16]:
# Set random seeds for reproducibility
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [17]:
import tensorflow as tf
print(tf.__version__)


2.15.0


# Read the CSV and Perform Basic Data Cleaning

In [120]:
df = pd.read_csv("exoplanet_data.csv")
# Drop columns and rows with all null values
df.dropna(axis='columns', how='all', inplace=True)
df.dropna(inplace=True)
df.head()

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


In [121]:
# Map the values in 'koi_disposition' to 'YES' and 'NO'
df['koi_disposition'] = df['koi_disposition'].map({
    'FALSE POSITIVE': 'NO',
    'CANDIDATE': 'NO',
    'CONFIRMED': 'YES'
})

# Verify the mapping was successful
print(df['koi_disposition'].unique())


['YES' 'NO']


In [127]:
# Verify that the mapping was successful and check the data types
print(df['koi_disposition'].unique())
print(df['koi_disposition'].dtype)


['YES' 'NO']
object


# Select your features (columns)

In [129]:
# Set features. This will also be used as your x values.
selected_features = df[['koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co',
       'koi_fpflag_ec', 'koi_period', 'koi_period_err1', 'koi_period_err2',
       'koi_time0bk', 'koi_time0bk_err1', 'koi_time0bk_err2', 'koi_impact',
       'koi_impact_err1', 'koi_impact_err2', 'koi_duration',
       'koi_duration_err1', 'koi_duration_err2', 'koi_depth', 'koi_depth_err1',
       'koi_depth_err2', 'koi_prad', 'koi_prad_err1', 'koi_prad_err2',
       'koi_teq', 'koi_insol', 'koi_insol_err1', 'koi_insol_err2',
       'koi_model_snr', 'koi_tce_plnt_num', 'koi_steff', 'koi_steff_err1',
       'koi_steff_err2', 'koi_slogg', 'koi_slogg_err1', 'koi_slogg_err2',
       'koi_srad', 'koi_srad_err1', 'koi_srad_err2', 'ra', 'dec',
       'koi_kepmag']]
selected_features.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,-0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,-0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,-0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,-0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,-0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


In [130]:
#Define feature matrix X and target vector y
X = selected_features
y = df['koi_disposition']


In [131]:
# Encode the target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Create a Train Test Split

Use `koi_disposition` for the y values

In [132]:
# Split the data into training and testing sets with stratification
X_train, X_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.3, random_state=SEED, stratify=y_encoded)


In [79]:
X_train.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,...,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag,koi_disposition
1039,0,0,0,0,34.173216,0.00012,-0.00012,137.78862,0.00291,-0.00291,...,4.573,0.01,-0.09,0.834,0.077,-0.026,288.3725,47.933559,13.733,CONFIRMED
4589,0,0,1,1,5.243319,9.2e-05,-9.2e-05,132.7859,0.0152,-0.0152,...,4.613,0.035,-0.105,0.735,0.123,-0.057,291.82629,41.7173,15.196,FALSE POSITIVE
2460,0,0,0,0,6.51003,1.3e-05,-1.3e-05,172.91588,0.00154,-0.00154,...,4.327,0.149,-0.182,1.153,0.31,-0.207,282.11133,42.3545,14.825,CONFIRMED
3409,0,0,1,0,1.190874,1.3e-05,-1.3e-05,131.71838,0.0078,-0.0078,...,4.464,0.105,-0.105,0.888,0.122,-0.1,294.59283,45.28772,15.28,FALSE POSITIVE
1983,0,0,0,0,4.603575,5e-06,-5e-06,171.840124,0.000813,-0.000813,...,4.602,0.014,-0.046,0.741,0.039,-0.03,297.354,41.300049,12.97,CONFIRMED


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [133]:
# Scale features using MinMaxScaler and encode the labels using one-hot encoding
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [134]:
# # Step 1: Label-encode the data set
# label_encoder = LabelEncoder()

# encoded_y_train = label_encoder.fit_transform(y_train)
# encoded_y_test = label_encoder.transform(y_test)

In [135]:
# # Step 2: Convert encoded labels to one-hot-encoding
# y_train_categorical = to_categorical(encoded_y_train)
# y_test_categorical = to_categorical(encoded_y_test)

# Train the Model



In [136]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
model = Sequential([
    Dense(units=128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.3),
    Dense(units=64, activation='relu'),
    Dropout(0.3),
    Dense(units=32, activation='relu'),
    Dense(units=3, activation='softmax')
])

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 100)               4100      
                                                                 
 dense_1 (Dense)             (None, 100)               10100     
                                                                 
 dense_2 (Dense)             (None, 2)                 202       
                                                                 
Total params: 14402 (56.26 KB)
Trainable params: 14402 (56.26 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [141]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=40,
    shuffle=True,
    verbose=2
)

Epoch 1/40
153/153 - 1s - loss: 0.3867 - accuracy: 0.7956 - 1s/epoch - 8ms/step
Epoch 2/40
153/153 - 0s - loss: 0.3203 - accuracy: 0.8279 - 165ms/epoch - 1ms/step
Epoch 3/40
153/153 - 0s - loss: 0.3172 - accuracy: 0.8367 - 162ms/epoch - 1ms/step
Epoch 4/40
153/153 - 0s - loss: 0.3070 - accuracy: 0.8434 - 163ms/epoch - 1ms/step
Epoch 5/40
153/153 - 0s - loss: 0.3000 - accuracy: 0.8514 - 159ms/epoch - 1ms/step
Epoch 6/40
153/153 - 0s - loss: 0.2961 - accuracy: 0.8520 - 165ms/epoch - 1ms/step
Epoch 7/40
153/153 - 0s - loss: 0.2890 - accuracy: 0.8559 - 158ms/epoch - 1ms/step
Epoch 8/40
153/153 - 0s - loss: 0.2867 - accuracy: 0.8645 - 153ms/epoch - 1ms/step
Epoch 9/40
153/153 - 0s - loss: 0.2832 - accuracy: 0.8594 - 154ms/epoch - 1ms/step
Epoch 10/40
153/153 - 0s - loss: 0.2756 - accuracy: 0.8745 - 153ms/epoch - 1ms/step
Epoch 11/40
153/153 - 0s - loss: 0.2760 - accuracy: 0.8667 - 154ms/epoch - 1ms/step
Epoch 12/40
153/153 - 0s - loss: 0.2721 - accuracy: 0.8727 - 153ms/epoch - 999us/step
Ep

<keras.src.callbacks.History at 0x140cf8940>

In [48]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

55/55 - 0s - loss: 0.2198 - accuracy: 0.9079 - 294ms/epoch - 5ms/step
Normal Neural Network - Loss: 0.21980665624141693, Accuracy: 0.9078947305679321


# Hyperparameter Tuning

Use `GridSearchCV` to tune the model's parameters

In [142]:
# Assuming y is your target variable

y = np.array(y).ravel()

In [143]:
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.svm import SVC

# Assuming X and yy are defined and are your features and target variables
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

param_grid = {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['linear', 'rbf']}
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=3)

# Fit the grid search to the data
grid.fit(X_train, y_train)

# Optional: Extract and use the best parameters or the best estimator
# print(grid.best_params_)
# best_estimator = grid.best_estimator_


Fitting 5 folds for each of 16 candidates, totalling 80 fits


In [None]:
print(grid.best_params_)
print(grid.best_score_)

NameError: name 'grid' is not defined

# Save the Model

In [None]:
# if joblib fails to import, try running the command to install in terminal/git-bash
import joblib
filename = 'model_1.sav'
joblib.dump(model_1, filename)