# Deep Neural Network for Exoplanet Discovery Method Classification
## Identifying Patterns in Exoplanet Characteristics to Determine Discovery Methods

In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd
import optuna as opt

  from .autonotebook import tqdm as notebook_tqdm


## Data

In [2]:
# data preprocessed in another jupytr nb 

composite_preprocessed = pd.read_csv('Composite_preprocessed_NO_MV_BALANCED.csv')
composite_preprocessed.head()

Unnamed: 0,Number of Stars,Number of Planets,Number of Moons,Galactic Latitude [deg],Galactic Longitude [deg],Ecliptic Latitude [deg],Ecliptic Longitude [deg],Number of Photometry Time Series,Number of Radial Velocity Time Series,Number of Stellar Spectra Measurements,Number of Emission Spectroscopy Measurements,Number of Transmission Spectroscopy Measurements,Circumbinary Flag,Controversial Flag,Discovery Year,Detected by Transits
0,3.094076,-0.666894,0.0,2.424559,1.234306,-0.145901,-0.624689,0.613405,1.731519,-0.2616,-0.012466,-0.044364,0,0,2007,0
1,-0.256668,-0.666894,0.0,1.18672,-0.283545,1.148661,-0.992906,0.613405,0.729626,-0.2616,-0.012466,-0.044364,0,0,2009,0
2,-0.256668,-0.666894,0.0,-0.877523,-0.306068,0.308947,-2.327878,0.613405,0.729626,-0.2616,-0.012466,-0.044364,0,0,2008,0
3,-0.256668,0.216988,0.0,1.382856,-0.669803,0.872499,-0.152934,0.613405,3.735304,0.591749,-0.012466,-0.044364,0,0,2002,0
4,6.44482,-0.666894,0.0,0.261241,-0.531444,1.023143,0.855489,0.613405,3.735304,2.298449,-0.012466,-0.044364,0,0,1996,0


### 2 possible results ; Either the exoplanet was discovered by transits (1) or it wasn't (0) ; binary classification
### so we have 1 neuron in the output layer  
### We will use the features from the XGBoost refined features model 
### This ensures consistency, reduces complexity, and leverages the feature selection process that has already been validated with my refined features XGBoost model
### This would also allow for a fair comparison between models and is likely to result in better performance for the DNN

## Remove unimportant features
### From the logistic regression model, 'Number of Moons' can be excluded because it does not provide value to the model

In [3]:
remove = ['Number of Radial Velocity Time Series',
          'Number of Stellar Spectra Measurements',
          'Controversial Flag',
          'Circumbinary Flag',
          'Number of Moons']

composite_unnecessary_removed = composite_preprocessed.drop(remove, axis=1)

## Train Test Split

In [4]:
# we are trying to predict whether an exoplanet has been detected by transits (0 or 1)
# target variable is if the exoplanet was discovered by transits

targets = composite_unnecessary_removed['Detected by Transits']

# training features are all variables except the targets

features = composite_unnecessary_removed.drop(['Detected by Transits'], axis=1)

## Observe Feature Correlations

In [5]:
features.corr()

Unnamed: 0,Number of Stars,Number of Planets,Galactic Latitude [deg],Galactic Longitude [deg],Ecliptic Latitude [deg],Ecliptic Longitude [deg],Number of Photometry Time Series,Number of Emission Spectroscopy Measurements,Number of Transmission Spectroscopy Measurements,Discovery Year
Number of Stars,1.0,0.109074,-0.044988,0.104236,-0.087528,-0.06945,0.057734,-0.0032,-0.002096,-0.147575
Number of Planets,0.109074,1.0,-0.055511,0.008589,0.02668,-0.023926,-0.002505,0.057797,0.076409,-0.072274
Galactic Latitude [deg],-0.044988,-0.055511,1.0,-0.019213,0.463063,0.174632,-0.090369,-0.025691,-0.008918,0.03182
Galactic Longitude [deg],0.104236,0.008589,-0.019213,1.0,-0.657044,-0.408779,0.131333,-0.008283,-0.026088,-0.106558
Ecliptic Latitude [deg],-0.087528,0.02668,0.463063,-0.657044,1.0,0.498442,-0.222283,-0.006864,0.025212,0.081113
Ecliptic Longitude [deg],-0.06945,-0.023926,0.174632,-0.408779,0.498442,1.0,-0.119612,0.013811,0.031078,0.058041
Number of Photometry Time Series,0.057734,-0.002505,-0.090369,0.131333,-0.222283,-0.119612,1.0,-0.004172,-0.014848,-0.252983
Number of Emission Spectroscopy Measurements,-0.0032,0.057797,-0.025691,-0.008283,-0.006864,0.013811,-0.004172,1.0,-0.000553,0.000791
Number of Transmission Spectroscopy Measurements,-0.002096,0.076409,-0.008918,-0.026088,0.025212,0.031078,-0.014848,-0.000553,1.0,-0.015425
Discovery Year,-0.147575,-0.072274,0.03182,-0.106558,0.081113,0.058041,-0.252983,0.000791,-0.015425,1.0


### Moderate correlation between Ecliptic Longtiude aand Ecliptic Latitude

## Split data

In [6]:
# Splitting dataset into training, testing and validation addresses overfitting
# shuffling is necessary to remove dependencies that come from order of data


# 80:10:10 split ; Training ; Validation ; Testing
# so we split twice; once into 80:20, then 50:50 (so 50% of the designated testing portion is for validation)

# first split
from sklearn.model_selection import train_test_split
x_train, x_temp, y_train, y_temp = train_test_split(features, targets, test_size = 0.8, random_state = 42, shuffle=True)

# second split
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size = 0.5, random_state=42, shuffle=True)

# tensorflow models expect data in the form of a np array; NOT a pd dataframe 
# so we have to convert

def conv_nparr(df):
    return np.array(df)

data_splits = [x_train, x_val, x_test, y_train, y_val, y_test]

# list comprehension to transform all the dfs into nparr
x_train_nparr, x_val_nparr, x_test_nparr, y_train_nparr, y_val_nparr, y_test_nparr = [conv_nparr(split) for split in data_splits]

# Random seed for reproducability

In [None]:
import random

def set_random_seed(seed):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    random.seed(seed)

set_random_seed(42)

## DNN Configuration

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# we use optuna for hyperparameter optimization
from optuna.integration import TFKerasPruningCallback

### Create DNN

In [8]:
# Initializes a Sequential Model (linear stack of layers)
# Simplest way to build a model in Keras, where you can just keep adding layers sequentially

input_size = x_train.shape[1] # 10 input features in our data 
# hidden_layer_size = 50 # start off with 50 neurons per hidden layer
output_size = 1 # because 2 possible classifications denoted by 0 and 1 in a single output; DNN calculates probability the exoplanet is 0 or 1 and the higher probability is the output 

# dropout_rate = 0.05 # dropout rate we will use in the model as a measure to prevent overfitting

def create_dnn(trial):

    # since we use optuna, we have to toss in the values we want it to test during the optimization process

    # units = neurons in the hidden later, 32 to 512 jumping 32 at a time 
    units = trial.suggest_int('units', 10, 100, step=5)

    # dropout rate is self explanatory
    dropout_rate = trial.suggest_float('dropout', 0.0, 0.5, step=0.05)

    # define our learning rate which impacts the final performance of our model, optimal LR is very beneficial in finding the right patterns
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True) # Log scale is better search 
    dnn_model = Sequential([
    # input layer for the input data; 10 features so 10 inputs for the first dense layer 
    # in a dense layer, each feature becomes an input to the next layer, which consists of neurons
    # the line below defines the input layer and creates the first dense layer consisting of 50 neurons
    # we use Rectified Linear Unit (ReLU) as the activation function because ReLU helps mitigate the vanishing gradient problem
# and allows the model to learn complex paterns
    tf.keras.layers.Dense(units, activation='relu', input_shape = (input_size,)),

# add a dropout layer with dropout rate of 0.1 
# This essentially just means 10% of the neurons will be randomly set to 0 during training
# Dropout is a regularization technique used to prevent overfitting 
# This helps prevent the model from overfitting by randomly dropping neurons during training
    tf.keras.layers.Dropout(dropout_rate),
    
# we only use 1 hidden layer of neurons because 2 lowers validation accuracy;
    
    # output layer with 1 neuron for binary classification
    tf.keras.layers.Dense(output_size, activation='sigmoid')
    ])
# next, choose the optimizer and loss function
# adam is usually the most optimal
# loss will be binary_crossentropy for binary classification
# the metric we care about is accuracy
    optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)
    dnn_model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return dnn_model

def objective(trial):
    model = create_dnn(trial)


    # suggest number of epochs and batch size
    epochs = trial.suggest_int('epochs', 5, 50, step=5)
    batch_size = trial.suggest_int('batch_size', 5,30,step=5)
    
    training = model.fit(
            x_train_nparr, 
            y_train_nparr, 
            epochs=epochs, 
            batch_size=batch_size, 
            validation_data=(x_val_nparr, y_val_nparr), 
            callbacks=[TFKerasPruningCallback(trial, 'val_accuracy')], 
            verbose=2
    )
    val_accuracy = max(training.history['val_accuracy'])
    return val_accuracy

# create a study and optimize the objective function
study = opt.create_study(direction='maximize')
study.optimize(objective, n_trials=200)


(f'Best trial: {study.best_trial.value}', f'Best hyperparameters: {study.best_trial.params}')


[I 2024-08-06 22:20:13,432] A new study created in memory with name: no-name-7393e54d-7c63-4a13-a90d-ba8be222bb40
2024-08-06 22:20:13.463445: I tensorflow/core/platform/cpu_feature_guard.cc:145] This TensorFlow binary is optimized with Intel(R) MKL-DNN to use the following CPU instructions in performance critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in non-MKL-DNN operations, rebuild TensorFlow with the appropriate compiler flags.
2024-08-06 22:20:13.464209: I tensorflow/core/common_runtime/process_util.cc:115] Creating new thread pool with default inter op setting: 8. Tune using inter_op_parallelism_threads for best performance.


Train on 1287 samples, validate on 2574 samples
Epoch 1/40
1287/1287 - 2s - loss: 537.2110 - accuracy: 0.4934 - val_loss: 494.1637 - val_accuracy: 0.5085
Epoch 2/40
1287/1287 - 0s - loss: 516.0747 - accuracy: 0.5012 - val_loss: 485.6730 - val_accuracy: 0.5085
Epoch 3/40
1287/1287 - 0s - loss: 523.2729 - accuracy: 0.4942 - val_loss: 477.2250 - val_accuracy: 0.5085
Epoch 4/40
1287/1287 - 0s - loss: 523.0213 - accuracy: 0.4934 - val_loss: 469.1120 - val_accuracy: 0.5085
Epoch 5/40
1287/1287 - 0s - loss: 501.7653 - accuracy: 0.5004 - val_loss: 461.1076 - val_accuracy: 0.5085
Epoch 6/40
1287/1287 - 0s - loss: 492.1315 - accuracy: 0.5051 - val_loss: 452.7862 - val_accuracy: 0.5085
Epoch 7/40
1287/1287 - 0s - loss: 479.1819 - accuracy: 0.4942 - val_loss: 444.5871 - val_accuracy: 0.5085
Epoch 8/40
1287/1287 - 0s - loss: 486.9675 - accuracy: 0.5082 - val_loss: 436.1751 - val_accuracy: 0.5085
Epoch 9/40
1287/1287 - 0s - loss: 478.5277 - accuracy: 0.4848 - val_loss: 428.1781 - val_accuracy: 0.508

[I 2024-08-06 22:20:33,452] Trial 0 finished with value: 0.5085470080375671 and parameters: {'units': 20, 'dropout': 0.45, 'learning_rate': 1.5106914009390135e-05, 'epochs': 40, 'batch_size': 15}. Best is trial 0 with value: 0.5085470080375671.


Train on 1287 samples, validate on 2574 samples
Epoch 1/30
1287/1287 - 1s - loss: 86.3879 - accuracy: 0.5027 - val_loss: 31.1556 - val_accuracy: 0.4915
Epoch 2/30
1287/1287 - 0s - loss: 89.6576 - accuracy: 0.4903 - val_loss: 28.2212 - val_accuracy: 0.4915
Epoch 3/30
1287/1287 - 0s - loss: 87.6862 - accuracy: 0.4965 - val_loss: 24.9304 - val_accuracy: 0.4915
Epoch 4/30
1287/1287 - 0s - loss: 83.4499 - accuracy: 0.5058 - val_loss: 22.3713 - val_accuracy: 0.4915
Epoch 5/30
1287/1287 - 0s - loss: 84.6665 - accuracy: 0.5012 - val_loss: 20.1648 - val_accuracy: 0.4915
Epoch 6/30
1287/1287 - 0s - loss: 89.0711 - accuracy: 0.4724 - val_loss: 18.7612 - val_accuracy: 0.4915
Epoch 7/30
1287/1287 - 0s - loss: 84.1273 - accuracy: 0.4996 - val_loss: 17.1613 - val_accuracy: 0.4915
Epoch 8/30
1287/1287 - 0s - loss: 76.8489 - accuracy: 0.5120 - val_loss: 15.5407 - val_accuracy: 0.4915
Epoch 9/30
1287/1287 - 0s - loss: 80.2673 - accuracy: 0.4918 - val_loss: 14.1669 - val_accuracy: 0.4915
Epoch 10/30
1287

[I 2024-08-06 22:20:42,910] Trial 1 finished with value: 0.49145299196243286 and parameters: {'units': 75, 'dropout': 0.30000000000000004, 'learning_rate': 1.4560999082689774e-05, 'epochs': 30, 'batch_size': 30}. Best is trial 0 with value: 0.5085470080375671.


Train on 1287 samples, validate on 2574 samples
Epoch 1/10
1287/1287 - 1s - loss: 269.4848 - accuracy: 0.5051 - val_loss: 120.5761 - val_accuracy: 0.4915
Epoch 2/10
1287/1287 - 0s - loss: 251.6336 - accuracy: 0.5019 - val_loss: 65.4939 - val_accuracy: 0.4915
Epoch 3/10
1287/1287 - 1s - loss: 232.9655 - accuracy: 0.4864 - val_loss: 43.2826 - val_accuracy: 0.4915
Epoch 4/10
1287/1287 - 0s - loss: 217.5188 - accuracy: 0.4903 - val_loss: 26.0540 - val_accuracy: 0.4915
Epoch 5/10
1287/1287 - 0s - loss: 217.1015 - accuracy: 0.5035 - val_loss: 12.5992 - val_accuracy: 0.4915
Epoch 6/10
1287/1287 - 0s - loss: 210.3301 - accuracy: 0.4903 - val_loss: 7.3794 - val_accuracy: 0.4915
Epoch 7/10
1287/1287 - 0s - loss: 182.2647 - accuracy: 0.4965 - val_loss: 3.9629 - val_accuracy: 0.4915
Epoch 8/10
1287/1287 - 0s - loss: 184.6144 - accuracy: 0.5012 - val_loss: 10.7073 - val_accuracy: 0.5085
Epoch 9/10
1287/1287 - 0s - loss: 170.1130 - accuracy: 0.4856 - val_loss: 1.3492 - val_accuracy: 0.4915
Epoch 10/

[I 2024-08-06 22:20:48,890] Trial 2 finished with value: 0.5236985087394714 and parameters: {'units': 30, 'dropout': 0.5, 'learning_rate': 0.00019771998101912301, 'epochs': 10, 'batch_size': 15}. Best is trial 2 with value: 0.5236985087394714.


Train on 1287 samples, validate on 2574 samples
Epoch 1/30
1287/1287 - 1s - loss: 50.2506 - accuracy: 0.4934 - val_loss: 13.7448 - val_accuracy: 0.4915
Epoch 2/30
1287/1287 - 0s - loss: 2.9785 - accuracy: 0.6224 - val_loss: 0.5611 - val_accuracy: 0.8520
Epoch 3/30
1287/1287 - 0s - loss: 0.5863 - accuracy: 0.8127 - val_loss: 0.6641 - val_accuracy: 0.7723
Epoch 4/30
1287/1287 - 0s - loss: 1.1600 - accuracy: 0.6861 - val_loss: 0.9204 - val_accuracy: 0.6865
Epoch 5/30
1287/1287 - 0s - loss: 0.9342 - accuracy: 0.7226 - val_loss: 0.9770 - val_accuracy: 0.6601
Epoch 6/30
1287/1287 - 0s - loss: 0.9118 - accuracy: 0.7273 - val_loss: 2.2018 - val_accuracy: 0.6072
Epoch 7/30
1287/1287 - 0s - loss: 1.7746 - accuracy: 0.7071 - val_loss: 0.7238 - val_accuracy: 0.7758
Epoch 8/30
1287/1287 - 0s - loss: 1.7693 - accuracy: 0.7187 - val_loss: 3.3831 - val_accuracy: 0.4918
Epoch 9/30
1287/1287 - 0s - loss: 1.7372 - accuracy: 0.7016 - val_loss: 0.7003 - val_accuracy: 0.8796
Epoch 10/30
1287/1287 - 0s - los

[I 2024-08-06 22:20:58,681] Trial 3 finished with value: 0.8853923678398132 and parameters: {'units': 40, 'dropout': 0.0, 'learning_rate': 0.005501988795231349, 'epochs': 30, 'batch_size': 30}. Best is trial 3 with value: 0.8853923678398132.


Train on 1287 samples, validate on 2574 samples
Epoch 1/20
1287/1287 - 3s - loss: 175.4525 - accuracy: 0.5089 - val_loss: 84.1453 - val_accuracy: 0.5085
Epoch 2/20
1287/1287 - 1s - loss: 152.9074 - accuracy: 0.5066 - val_loss: 47.3681 - val_accuracy: 0.5085
Epoch 3/20
1287/1287 - 1s - loss: 143.0460 - accuracy: 0.5237 - val_loss: 32.3823 - val_accuracy: 0.5085
Epoch 4/20
1287/1287 - 1s - loss: 140.2748 - accuracy: 0.4981 - val_loss: 24.7775 - val_accuracy: 0.5085
Epoch 5/20
1287/1287 - 1s - loss: 131.7348 - accuracy: 0.5074 - val_loss: 18.4733 - val_accuracy: 0.5085
Epoch 6/20
1287/1287 - 1s - loss: 133.1861 - accuracy: 0.4709 - val_loss: 10.3137 - val_accuracy: 0.5085
Epoch 7/20
1287/1287 - 1s - loss: 120.6075 - accuracy: 0.5097 - val_loss: 8.9505 - val_accuracy: 0.5085
Epoch 8/20
1287/1287 - 1s - loss: 103.2057 - accuracy: 0.5144 - val_loss: 4.1518 - val_accuracy: 0.5085
Epoch 9/20
1287/1287 - 1s - loss: 113.4721 - accuracy: 0.4942 - val_loss: 0.9881 - val_accuracy: 0.5128
Epoch 10/2

[I 2024-08-06 22:21:26,618] Trial 4 finished with value: 0.8908314108848572 and parameters: {'units': 20, 'dropout': 0.4, 'learning_rate': 9.009789315113767e-05, 'epochs': 20, 'batch_size': 5}. Best is trial 4 with value: 0.8908314108848572.


Train on 1287 samples, validate on 2574 samples
Epoch 1/35
1287/1287 - 3s - loss: 42.2158 - accuracy: 0.5051 - val_loss: 0.5345 - val_accuracy: 0.7607
Epoch 2/35
1287/1287 - 1s - loss: 0.9717 - accuracy: 0.5299 - val_loss: 0.7174 - val_accuracy: 0.4918
Epoch 3/35
1287/1287 - 1s - loss: 0.8620 - accuracy: 0.5260 - val_loss: 0.7665 - val_accuracy: 0.4918
Epoch 4/35
1287/1287 - 1s - loss: 0.8524 - accuracy: 0.4942 - val_loss: 0.7245 - val_accuracy: 0.5089
Epoch 5/35
1287/1287 - 1s - loss: 0.7713 - accuracy: 0.5043 - val_loss: 0.7521 - val_accuracy: 0.4918
Epoch 6/35
1287/1287 - 1s - loss: 0.7157 - accuracy: 0.5284 - val_loss: 0.6851 - val_accuracy: 0.5097
Epoch 7/35
1287/1287 - 1s - loss: 0.7198 - accuracy: 0.5058 - val_loss: 0.6917 - val_accuracy: 0.5109
Epoch 8/35
1287/1287 - 1s - loss: 0.7026 - accuracy: 0.4864 - val_loss: 0.6896 - val_accuracy: 0.5085
Epoch 9/35
1287/1287 - 1s - loss: 0.6925 - accuracy: 0.5338


[W 2024-08-06 22:21:38,993] Trial 5 failed with parameters: {'units': 20, 'dropout': 0.25, 'learning_rate': 0.006043342917512197, 'epochs': 35, 'batch_size': 5} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/Users/kevin/anaconda3/envs/tf_env/lib/python3.7/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/var/folders/0h/qrfpw0p522n_hd8ccn5ffyvh0000gn/T/ipykernel_87249/684779761.py", line 64, in objective
    verbose=2
  File "/Users/kevin/anaconda3/envs/tf_env/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 728, in fit
    use_multiprocessing=use_multiprocessing)
  File "/Users/kevin/anaconda3/envs/tf_env/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 337, in fit
    eval_data_iter = iter(validation_dataset)
  File "/Users/kevin/anaconda3/envs/tf_env/lib/python3.7/site-packages/tensorflow_core/python/data/o

KeyboardInterrupt: 

### Extract the best hyperparameters determined by Optuna

In [None]:
best_params = study.best_trial.params

### Train the model now using the best hyperparameters

In [None]:
# access the parameters using keys (study best trial params is a dictionary)


best_dnn = Sequential([
    
    tf.keras.layers.Dense(units= best_params['units'], activation='relu', input_shape = (input_size,)),

    tf.keras.layers.Dropout(best_params['dropout']),
    
    tf.keras.layers.Dense(output_size, activation='sigmoid')
    ])

best_optimizer = tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate'])
best_dnn.compile(optimizer=best_optimizer, loss='binary_crossentropy', metrics=['accuracy'])

### Fit the best model

In [None]:
best_dnn_model = best_dnn.fit(
        x_train_nparr, 
        y_train_nparr, 
        epochs=best_params['epochs'], 
        batch_size=best_params['batch_size'], 
        validation_data=(x_val_nparr, y_val_nparr), 
        verbose=2
    )

## Using .evaluate to examine model performance

In [None]:
test_loss, test_accuracy = best_dnn_model.evaluate(x_test_nparr, y_test_nparr)
('Test loss:',test_loss),
('Test accuracy:',test_accuracy)

## Using .predict to evaluate model performance

In [None]:
# Here we actually make the predictions on the test set
# model outputs the probability for the positive class
# also convert the probabilities to binary class predictions with a threshold of 0.5

# gives us the probability the exoplanet belongs to each class
y_pred_prob = best_dnn_model.predict(x_test_nparr)

### Actual class predictions

In [None]:
# We use these probabilities for calculating the ROC-AUC score and other performance metrics that require probabilities
# ravel() flattens the matrix to a 1D array of probabilities

y_pred = (y_pred_prob > 0.5).astype("int32")

In [None]:
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score, confusion_matrix
import matplotlib.pyplot as plt

In [None]:
cm = confusion_matrix(y_test_nparr, y_pred)
dnn_accuracy = accuracy_score(y_test_nparr, y_pred)
classif_report = classification_report(y_test_nparr, y_pred)
roc_score = roc_auc_score(y_test_nparr, y_pred_prob)

cm,
('DNN Accuracy on Test Set:', dnn_accuracy),
('DNN Classification Report on Test Set:\n', classif_report),
('DNN ROC AUC Score on Test Set:', roc_score), 

## Plot Validation and Training Metrics for visual presentation 

In [None]:
plt.figure(figsize=(10,6))

# training accuracy over epochs
plt.plot(best_dnn_model.history['accuracy'])

# validation accuracy over epochs
plt.plot(best_dnn_model.history['val_accuracy'])

plt.title('Exoplanet DNN Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
# Display loss separately 
# Loss is a fundamental measure of how well a machine learning model's predictions match the actual outcomes
# Loss quantifies the difference between pred values and actual values 
# Goal of training a model is to MINIMIZE LOSS
# By minimizing loss we can improve the model's accuracy and predictive power
# Loss guides hyperparamter tuning

plt.figure(figsize=(10,6))

# training loss over epochs
plt.plot(best_dnn_model.history['loss'])

# validation loss over epochs
plt.plot(best_dnn_model.history['val_loss'])

plt.title('Exoplanet DNN Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# DNN Feature Importance 

In [None]:
# get weights from the input layer
weights = dnn_best_model.layers[0].get_weights()[0]

# calculate importance of each feature
feature_importance = np.sum(np.abs(weights), axis=1)

# normalize
feature_importance /= np.sum(feature_importance)

## Make it into a df

In [None]:
feature_names = features.keys()
feature_weights_df = pd.Dataframe({'Feature': feature_names, 'Normalized Weight': feature_importance})

feature_weights_df = feature_weights_df.sort_values(by='Normalized Weight', ascending=False)
feature_weights_df

## Plot features and weights

In [None]:
plt.figure(figsize=(10,8))
plt.barh(feature_weights_df['Feature'], feature_weights_df['Weight'])
plt.xlabel('Feature Weight')
plt.title('Feature Importance by the DNN')
plt.gca().invert_yaxis()
plt.show()

# Save the trained model for future use

In [None]:
# saves model to hdf5 file
# best_dnn_model.save('best_exopldm_model.h5')