# Neural Network Model

In [1]:
import pandas as pd

## The Dataset

In [2]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()

## Data Pre-Processing

In [3]:
target = df['koi_disposition']
target_names = ['Confirmed', 'False Positive', 'Candidate']
df = df.drop("koi_disposition", axis=1)

In [4]:
# Set features. This will also be used as your x values.
selected_features = df[['koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co',
       'koi_fpflag_ec', 'koi_period','koi_time0bk', 'koi_impact',
        'koi_duration', 'koi_depth', 'koi_prad', 'koi_teq', 'koi_insol', 
       'koi_model_snr', 'koi_steff', 'koi_slogg', 'koi_srad', 'ra', 'dec',
       'koi_kepmag']]

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(selected_features, target, random_state=1)

In [6]:
from sklearn.preprocessing import StandardScaler

X_scaler = StandardScaler().fit(X_train)

In [7]:
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
from sklearn.preprocessing import LabelEncoder

# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [9]:
from keras.utils import to_categorical

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

Using TensorFlow backend.


## Create a Deep Learning Model

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Create a Neural Network model here
deep_model = Sequential()
deep_model.add(Dense(units=6, activation='relu', input_dim=19))
#deep_model.add(Dense(units=6, activation='relu'))
deep_model.add(Dense(units=3, activation='softmax'))

In [11]:
# Compile and fit the model
deep_model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 5243 samples
Epoch 1/100
5243/5243 - 1s - loss: 0.9632 - accuracy: 0.5184
Epoch 2/100
5243/5243 - 0s - loss: 0.7140 - accuracy: 0.6784
Epoch 3/100
5243/5243 - 0s - loss: 0.5644 - accuracy: 0.7318
Epoch 4/100
5243/5243 - 0s - loss: 0.4809 - accuracy: 0.7448
Epoch 5/100
5243/5243 - 0s - loss: 0.4437 - accuracy: 0.7471
Epoch 6/100
5243/5243 - 0s - loss: 0.4256 - accuracy: 0.7694
Epoch 7/100
5243/5243 - 0s - loss: 0.4148 - accuracy: 0.7829
Epoch 8/100
5243/5243 - 0s - loss: 0.4072 - accuracy: 0.7971
Epoch 9/100
5243/5243 - 0s - loss: 0.4008 - accuracy: 0.7942
Epoch 10/100
5243/5243 - 0s - loss: 0.3948 - accuracy: 0.7957
Epoch 11/100
5243/5243 - 0s - loss: 0.3888 - accuracy: 0.8034
Epoch 12/100
5243/5243 - 0s - loss: 0.3828 - accuracy: 0.8091
Epoch 13/100
5243/5243 - 0s - loss: 0.3782 - accuracy: 0.8138
Epoch 14/100
5243/5243 - 0s - loss: 0.3736 - accuracy: 0.8133
Epoch 15/100
5243/5243 - 0s - loss: 0.3703 - accuracy: 0.8184
Epoch 16/100
5243/5243 - 0s - loss: 0.3673 - accuracy: 0.

<tensorflow.python.keras.callbacks.History at 0x1c47c306c50>

## Quantify our Trained Model

In [12]:
model_loss, model_accuracy = deep_model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1748/1748 - 0s - loss: 0.3290 - accuracy: 0.8473
Normal Neural Network - Loss: 0.32898509495591133, Accuracy: 0.8472539782524109


In [13]:
# Use the first 5 test data values to make a prediction and compare it to the actual labels
predictions = deep_model.predict_classes(X_test_scaled[:5])


print(f"Actual Labels: {list(y_test[:5])}, Predicted labels: {predictions}")

Actual Labels: ['CONFIRMED', 'FALSE POSITIVE', 'FALSE POSITIVE', 'CONFIRMED', 'FALSE POSITIVE'], Predicted labels: [0 2 2 1 2]
