In [1]:
# Data manipulation
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Pre-processing and setup functions
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.feature_selection import mutual_info_classif

# PCA Decomposition
from sklearn.decomposition import PCA

# Neural Network
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Set viewable columns in pandas
pd.set_option('display.max_columns', 50)

### Import Data

In [2]:
exo2 = pd.read_csv('transformed_features.csv')

In [3]:
exo2 = exo2.drop(columns=['Unnamed: 0'])

In [4]:
exo2.sample(10)

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,log_koi_period,log_koi_time0bk,log_koi_impact,log_koi_duration,log_koi_depth,log_koi_prad,log_koi_teq,log_koi_insol,log_koi_model_snr,log_koi_tce_plnt_num,log_koi_steff,log_koi_slogg,log_koi_srad,log_ra,log_dec,log_koi_kepmag
6577,FALSE POSITIVE,0,0,1,0,0.397097,-0.539393,0.755292,-0.116783,0.213857,0.008667,-0.512965,-0.513844,-0.198399,-0.40433,0.004567,0.318777,-0.376245,0.122833,0.521041,0.939
3480,FALSE POSITIVE,0,1,1,1,0.211626,-0.508361,0.720124,0.820591,0.698258,0.365674,-0.065749,-0.066413,0.718376,-0.40433,0.61505,0.011009,-0.142215,1.450116,-0.607809,-0.206421
1643,CONFIRMED,0,0,0,0,0.132028,-0.511217,0.503299,-0.656506,0.002098,-0.368264,-0.365373,-0.366184,-0.367394,-0.40433,-0.118345,0.420906,-0.526666,0.51931,0.796726,1.142203
263,FALSE POSITIVE,0,0,1,1,-0.904881,-0.561859,0.539424,-0.201031,-0.796466,-0.817225,0.866673,0.867821,-0.259994,-0.40433,0.719133,0.213541,-0.203192,0.780768,1.066723,-0.01076
3981,FALSE POSITIVE,0,1,1,0,-1.3045,-0.598237,0.466383,-0.184956,-0.694251,0.075058,1.916203,1.914891,0.496362,-0.40433,-0.060384,-1.293113,1.989463,0.77091,-0.357185,-1.315519
6113,FALSE POSITIVE,1,0,0,0,1.911527,1.659259,0.194985,0.256219,-0.50624,0.258842,-0.247188,-0.246583,-0.903485,-0.40433,3.650065,-1.045151,2.130936,-0.343501,0.356956,-0.898423
600,FALSE POSITIVE,0,1,0,0,-1.180346,-0.556888,0.856746,-0.494955,1.720603,1.465848,1.111487,1.109784,1.178931,-0.40433,0.667284,0.209237,-0.157632,-0.630178,1.989551,-0.738329
6486,FALSE POSITIVE,0,0,1,1,-1.326219,-0.60909,-1.136573,-0.831646,-1.264792,-0.849032,1.638396,1.637997,-0.979485,-0.40433,1.027809,-0.295371,0.720635,0.037069,-0.800308,-0.206421
6834,FALSE POSITIVE,0,1,0,0,-1.080113,-0.615324,0.075162,-0.896254,-1.264792,-1.13473,1.066023,1.064865,-0.87806,-0.40433,0.799526,0.204931,-0.055582,1.024975,1.604541,-0.097872
6547,FALSE POSITIVE,0,0,1,1,-0.695288,-0.583675,0.899529,0.569301,-0.529596,1.645698,1.354044,1.354487,0.056414,-0.40433,0.552081,-1.020707,1.597275,1.466168,-0.59314,-0.746612


## Transform and Encode

In [5]:
# Separate X and y subsets
y = exo2['koi_disposition']
X = exo2.drop(['koi_disposition'], axis=1)

# Split X and y subsets into Test and Train datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

# Scale continuous values between 0 < x < 1
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Transform the y subsets into model usable representations
label_encoder = LabelEncoder()
label_encoder.fit(y_train)

encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

  return self.partial_fit(X, y)


## Build Model

In [6]:
X_train_scaled.shape

(5243, 20)

In [7]:
model = Sequential()

model.add(Dense(
    units = 50, kernel_regularizer = keras.regularizers.l2(0.001),
    activation = 'relu', 
    input_dim = 20
))

model.add(Dense(
    units = 50, kernel_regularizer = keras.regularizers.l2(0.001),
    activation = 'relu'
))

model.add(Dense(
    units = 20, kernel_regularizer = keras.regularizers.l2(0.001),
    activation = 'relu'
))

model.add(Dense(
    units = 3, 
    activation = 'softmax'
))

Instructions for updating:
Colocations handled automatically by placer.


In [8]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 50)                1050      
_________________________________________________________________
dense_1 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_2 (Dense)              (None, 20)                1020      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 63        
Total params: 4,683
Trainable params: 4,683
Non-trainable params: 0
_________________________________________________________________


## Compile and Fit the Model

In [9]:
model.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

model.fit(
    X_train_scaled, 
    y_train_categorical, 
    epochs = 50,
    shuffle = True, 
    verbose = 1
)

Instructions for updating:
Use tf.cast instead.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x28b3653a1d0>

## Model Accuracy

In [10]:
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=0)
print(f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 0.2892514545666544, Accuracy: 0.9050343036651611


In [11]:
encoded_predictions = model.predict_classes(X_test_scaled[:10])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

print(f'Predicted classes: {prediction_labels}')
print(f'Actual labels: {list(y_test[:10])}')

Predicted classes: ['FALSE POSITIVE' 'FALSE POSITIVE' 'CONFIRMED' 'CONFIRMED' 'CANDIDATE'
 'CANDIDATE' 'CONFIRMED' 'CONFIRMED' 'FALSE POSITIVE' 'CONFIRMED']
Actual labels: ['FALSE POSITIVE', 'FALSE POSITIVE', 'CONFIRMED', 'CANDIDATE', 'CANDIDATE', 'CANDIDATE', 'CANDIDATE', 'CANDIDATE', 'FALSE POSITIVE', 'CONFIRMED']


In [12]:
model.save('Deep_NN.h5')