In [1]:
# Update sklearn to prevent version mismatches
#!pip install sklearn --upgrade

In [2]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [3]:
import pandas as pd
from numpy.random import seed
seed(87)

# Read the CSV and Perform Basic Data Cleaning

In [4]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


In [5]:
#set all columns to be displayed
pd.set_option('display.max_columns', None)

# Select your features (columns)

In [6]:
# Set features. This will also be used as your x values.
X = df.drop("koi_disposition", axis=1)
y = df["koi_disposition"]
print(X.shape, y.shape)

(6991, 40) (6991,)


In [7]:
df.koi_disposition.unique()

array(['CONFIRMED', 'FALSE POSITIVE', 'CANDIDATE'], dtype=object)

# Create a Train Test Split

Use `koi_disposition` for the y values

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
 X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [10]:
X_train.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,koi_impact,koi_impact_err1,koi_impact_err2,koi_duration,koi_duration_err1,koi_duration_err2,koi_depth,koi_depth_err1,koi_depth_err2,koi_prad,koi_prad_err1,koi_prad_err2,koi_teq,koi_insol,koi_insol_err1,koi_insol_err2,koi_model_snr,koi_tce_plnt_num,koi_steff,koi_steff_err1,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
3563,0,0,0,0,10.548413,5.47e-05,-5.47e-05,139.06402,0.00411,-0.00411,1.017,0.047,-0.786,1.872,0.143,-0.143,102.9,10.4,-10.4,3.89,0.65,-0.34,899,154.84,72.78,-37.23,11.7,2,6047,120,-133,4.387,0.066,-0.123,1.092,0.181,-0.097,298.09543,44.737061,13.204
4099,0,0,0,0,24.754385,0.0001365,-0.0001365,140.20732,0.00446,-0.00446,0.709,0.023,-0.516,3.39,0.153,-0.153,593.3,38.0,-38.0,2.1,0.14,-0.2,491,13.7,3.92,-3.75,18.0,1,4852,144,-144,4.519,0.078,-0.052,0.804,0.056,-0.076,295.73535,42.576248,15.514
5460,0,0,0,0,1.057336,1.23e-07,-1.23e-07,131.792007,9.6e-05,-9.6e-05,0.262,0.274,-0.113,1.5795,0.0152,-0.0152,47337.0,120.0,-120.0,14.59,1.15,-1.28,1276,623.51,184.18,-164.28,476.0,1,4664,126,-140,4.594,0.054,-0.027,0.683,0.054,-0.06,292.18417,49.31004,15.414
1091,0,0,0,0,201.118319,0.001461,-0.001461,187.56986,0.00529,-0.00529,0.001,0.417,-0.001,10.328,0.165,-0.165,584.8,19.2,-19.2,2.28,0.32,-0.2,300,1.92,0.77,-0.44,34.7,1,5646,101,-112,4.447,0.072,-0.108,0.954,0.135,-0.083,283.11377,48.13139,13.328
5999,0,0,0,0,91.649983,0.003181,-0.003181,175.7156,0.0286,-0.0286,0.2136,0.2282,-0.2135,10.294,0.939,-0.939,193.6,23.9,-23.9,2.27,1.27,-0.54,568,24.57,41.53,-12.19,8.7,2,6705,164,-233,4.145,0.164,-0.164,1.608,0.905,-0.383,294.93198,39.81242,12.964


In [11]:
y_train.head()

3563    CANDIDATE
4099    CONFIRMED
5460    CANDIDATE
1091    CONFIRMED
5999    CANDIDATE
Name: koi_disposition, dtype: object

In [12]:
y_test.head()

1213         CONFIRMED
5220    FALSE POSITIVE
6688    FALSE POSITIVE
2031         CONFIRMED
5824    FALSE POSITIVE
Name: koi_disposition, dtype: object

# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [13]:
# Scale your data
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [14]:
len(X_train_scaled[0])

40

In [15]:
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)


In [16]:
label_encoder.fit(y_test)
encoded_y_test = label_encoder.transform(y_test)

In [17]:
 from tensorflow.keras.utils import to_categorical

In [18]:
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)
y_train_categorical

array([[1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       ...,
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.]], dtype=float32)

# Train the Model



In [19]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=20, activation='relu', input_dim=40))
model.add(Dense(units=20, activation='relu'))
model.add(Dense(units=3, activation='softmax'))

2022-05-15 13:45:13.411025: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 20)                820       
                                                                 
 dense_1 (Dense)             (None, 20)                420       
                                                                 
 dense_2 (Dense)             (None, 3)                 63        
                                                                 
Total params: 1,303
Trainable params: 1,303
Non-trainable params: 0
_________________________________________________________________


In [21]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [22]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=50,
    shuffle=True,
    verbose=2
)

Epoch 1/50
164/164 - 1s - loss: 0.8311 - accuracy: 0.6241 - 545ms/epoch - 3ms/step
Epoch 2/50
164/164 - 0s - loss: 0.4522 - accuracy: 0.7841 - 191ms/epoch - 1ms/step
Epoch 3/50
164/164 - 0s - loss: 0.3935 - accuracy: 0.8056 - 196ms/epoch - 1ms/step
Epoch 4/50
164/164 - 0s - loss: 0.3783 - accuracy: 0.8089 - 184ms/epoch - 1ms/step
Epoch 5/50
164/164 - 0s - loss: 0.3648 - accuracy: 0.8167 - 200ms/epoch - 1ms/step
Epoch 6/50
164/164 - 0s - loss: 0.3578 - accuracy: 0.8209 - 186ms/epoch - 1ms/step
Epoch 7/50
164/164 - 0s - loss: 0.3525 - accuracy: 0.8276 - 182ms/epoch - 1ms/step
Epoch 8/50
164/164 - 0s - loss: 0.3474 - accuracy: 0.8299 - 296ms/epoch - 2ms/step
Epoch 9/50
164/164 - 0s - loss: 0.3425 - accuracy: 0.8320 - 218ms/epoch - 1ms/step
Epoch 10/50
164/164 - 0s - loss: 0.3396 - accuracy: 0.8405 - 179ms/epoch - 1ms/step
Epoch 11/50
164/164 - 0s - loss: 0.3362 - accuracy: 0.8383 - 184ms/epoch - 1ms/step
Epoch 12/50
164/164 - 0s - loss: 0.3316 - accuracy: 0.8419 - 186ms/epoch - 1ms/step
E

<keras.callbacks.History at 0x12dd70c10>

In [23]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

55/55 - 0s - loss: 0.2614 - accuracy: 0.8890 - 174ms/epoch - 3ms/step
Normal Neural Network - Loss: 0.26135367155075073, Accuracy: 0.8890160322189331


# Hyperparameter Tuning

Use `GridSearchCV` to tune the model's parameters

In [28]:

# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=5,
        step=2), activation=activation, input_dim=40))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=5,
            step=2),
            activation=activation))
    
    nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn    

In [25]:
# Import the kerastuner library

import keras_tuner as kt
import tensorflow as tf

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

INFO:tensorflow:Reloading Oracle from existing project ./untitled_project/oracle.json
INFO:tensorflow:Reloading Tuner from ./untitled_project/tuner0.json


In [26]:
# Train the model with GridSearch
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,encoded_y_train,epochs=20,validation_data=(X_test_scaled,encoded_y_test))

Trial 60 Complete [00h 00m 11s]
val_accuracy: 0.3632723093032837

Best val_accuracy So Far: 0.3707093894481659
Total elapsed time: 00h 04m 21s
INFO:tensorflow:Oracle triggered exit


# Save the Model

In [None]:
# the work with neural network will not be saved as the Best val_accuracy So Far: 0.3707093894481659
#neural network may not be the best model to process this assignment data