In [1]:
!pip install sklearn --upgrade

Requirement already up-to-date: sklearn in c:\users\home laptop\anaconda3\lib\site-packages (0.0)


In [2]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [3]:
from numpy.random import seed
seed(1)

In [4]:
# import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense

In [5]:
import pandas as pd
import numpy as np

# Read the CSV and Perform Basic Data Cleaning

In [6]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


# Select your features (columns)

In [7]:
# Set features. This will also be used as your x values.
selected_features = df[['koi_disposition','koi_impact','koi_duration','koi_depth','koi_prad','koi_teq','koi_insol','koi_model_snr']]
selected_features = selected_features[selected_features['koi_disposition']!='CANDIDATE']
selected_features['koi_disposition'] = selected_features['koi_disposition'].map({'CONFIRMED': 1, 'CANDIDATE': 2, 'FALSE POSITIVE':3})
selected_features

Unnamed: 0,koi_disposition,koi_impact,koi_duration,koi_depth,koi_prad,koi_teq,koi_insol,koi_model_snr
0,1,0.586,4.50700,874.8,2.83,443,9.11,25.8
1,3,0.969,1.78220,10829.0,14.60,638,39.30,76.3
2,3,1.276,2.40641,8079.2,33.46,1395,891.96,505.6
3,1,0.701,1.65450,603.3,2.75,1406,926.16,40.9
4,1,0.762,3.14020,686.0,2.77,1160,427.65,40.2
...,...,...,...,...,...,...,...,...
6983,3,2.441,77.98300,1583.8,534.47,1006,242.54,171.5
6986,3,0.765,4.80600,87.7,1.11,929,176.40,8.4
6987,3,1.252,3.22210,1579.2,29.35,2088,4500.53,453.3
6989,3,0.147,0.86500,103.6,1.07,2218,5713.41,12.3


In [8]:
# Read the training data into a dataframe
X = selected_features[['koi_impact','koi_duration','koi_depth','koi_prad','koi_teq','koi_insol','koi_model_snr']]
y = selected_features['koi_disposition']

In [9]:
#columns
feature_names = selected_features.columns

# Create a Train Test Split

In [10]:
# Use train_test_split to create training and testing data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [11]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
# Read the training data into a dataframe
X_train_scaled_df = pd.DataFrame(X_train_scaled)
X_train_scaled_df.head()

Unnamed: 0,0,1,2,3,4,5,6
0,0.00514,0.014963,0.000122,6e-06,0.020172,4.759054e-07,0.000265
1,0.009245,0.017742,0.0025,5.6e-05,0.057221,1.584646e-05,0.023829
2,0.012618,0.056428,0.00228,0.000163,0.059142,1.788253e-05,0.015312
3,0.008591,0.024841,0.000269,1.1e-05,0.046998,7.879385e-06,0.002408
4,0.001915,0.072343,0.0002,1.3e-05,0.048714,8.944463e-06,0.007192


In [13]:
# Convert the dataframe to a numpy array for Keras
X_train = X_train_scaled_df.values
X_train.shape

(3978, 7)

In [14]:
# Read in the training labels as a dataframe
y_train_df = pd.DataFrame(y_train)
y_train_df.head()

Unnamed: 0,koi_disposition
4608,3
2644,1
1322,3
460,1
2579,1


In [15]:
y_train = y_train_df.values
y_train.shape

(3978, 1)

In [16]:
# One-hot encode the integer labels
y_train = to_categorical(y_train_df)
y_train[:10]
y_train.shape

(3978, 4)

In [17]:
# Read the testing labels as a dataframe
y_test_df = pd.DataFrame(y_test)
y_test.shape

(1326,)

In [18]:
# One-hot encode the integer labels
y_test = to_categorical(y_test_df)
y_test_df.shape

(1326, 1)

# Build a Deep Neural Network

In [19]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=7))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=4, activation='softmax'))

In [20]:
# The output layer has 2 columns that are one-hot encoded
y_train.shape

(3978, 4)

In [21]:
# Compile the model using categorical_crossentropy for the loss function, the adam optimizer,
# and add accuracy to the training metrics
model.compile(loss="categorical_crossentropy",
              optimizer="adam", metrics=['accuracy'])

In [22]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               800       
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 404       
Total params: 21,404
Trainable params: 21,404
Non-trainable params: 0
_________________________________________________________________


In [24]:
# Use the training data to fit (train) the model
model.fit(
    X_train_scaled,
    y_train,
    epochs=1000,
    shuffle=True,
    verbose=2
)

Epoch 1/1000
125/125 - 0s - loss: 0.7486 - accuracy: 0.6589
Epoch 2/1000
125/125 - 0s - loss: 0.5035 - accuracy: 0.7076
Epoch 3/1000
125/125 - 0s - loss: 0.4629 - accuracy: 0.7544
Epoch 4/1000
125/125 - 0s - loss: 0.4479 - accuracy: 0.7624
Epoch 5/1000
125/125 - 0s - loss: 0.4399 - accuracy: 0.7730
Epoch 6/1000
125/125 - 0s - loss: 0.4284 - accuracy: 0.7838
Epoch 7/1000
125/125 - 0s - loss: 0.4218 - accuracy: 0.7853
Epoch 8/1000
125/125 - 0s - loss: 0.4171 - accuracy: 0.7974
Epoch 9/1000
125/125 - 0s - loss: 0.4186 - accuracy: 0.7914
Epoch 10/1000
125/125 - 0s - loss: 0.4083 - accuracy: 0.8009
Epoch 11/1000
125/125 - 0s - loss: 0.4080 - accuracy: 0.7959
Epoch 12/1000
125/125 - 0s - loss: 0.4068 - accuracy: 0.8049
Epoch 13/1000
125/125 - 0s - loss: 0.4008 - accuracy: 0.8107
Epoch 14/1000
125/125 - 0s - loss: 0.3981 - accuracy: 0.8039
Epoch 15/1000
125/125 - 0s - loss: 0.3926 - accuracy: 0.8120
Epoch 16/1000
125/125 - 0s - loss: 0.3873 - accuracy: 0.8193
Epoch 17/1000
125/125 - 0s - loss

Epoch 135/1000
125/125 - 0s - loss: 0.2386 - accuracy: 0.8949
Epoch 136/1000
125/125 - 0s - loss: 0.2414 - accuracy: 0.8974
Epoch 137/1000
125/125 - 0s - loss: 0.2374 - accuracy: 0.9015
Epoch 138/1000
125/125 - 0s - loss: 0.2465 - accuracy: 0.8949
Epoch 139/1000
125/125 - 0s - loss: 0.2455 - accuracy: 0.8924
Epoch 140/1000
125/125 - 0s - loss: 0.2415 - accuracy: 0.8947
Epoch 141/1000
125/125 - 0s - loss: 0.2502 - accuracy: 0.8914
Epoch 142/1000
125/125 - 0s - loss: 0.2456 - accuracy: 0.8952
Epoch 143/1000
125/125 - 0s - loss: 0.2400 - accuracy: 0.8954
Epoch 144/1000
125/125 - 0s - loss: 0.2399 - accuracy: 0.8964
Epoch 145/1000
125/125 - 0s - loss: 0.2407 - accuracy: 0.8942
Epoch 146/1000
125/125 - 0s - loss: 0.2389 - accuracy: 0.8959
Epoch 147/1000
125/125 - 0s - loss: 0.2408 - accuracy: 0.8989
Epoch 148/1000
125/125 - 0s - loss: 0.2397 - accuracy: 0.8947
Epoch 149/1000
125/125 - 0s - loss: 0.2477 - accuracy: 0.8937
Epoch 150/1000
125/125 - 0s - loss: 0.2419 - accuracy: 0.8969
Epoch 15

125/125 - 0s - loss: 0.2203 - accuracy: 0.9040
Epoch 268/1000
125/125 - 0s - loss: 0.2228 - accuracy: 0.9017
Epoch 269/1000
125/125 - 0s - loss: 0.2213 - accuracy: 0.9040
Epoch 270/1000
125/125 - 0s - loss: 0.2231 - accuracy: 0.9040
Epoch 271/1000
125/125 - 0s - loss: 0.2214 - accuracy: 0.9032
Epoch 272/1000
125/125 - 0s - loss: 0.2220 - accuracy: 0.9030
Epoch 273/1000
125/125 - 0s - loss: 0.2199 - accuracy: 0.9045
Epoch 274/1000
125/125 - 0s - loss: 0.2267 - accuracy: 0.9037
Epoch 275/1000
125/125 - 0s - loss: 0.2181 - accuracy: 0.9050
Epoch 276/1000
125/125 - 0s - loss: 0.2231 - accuracy: 0.9027
Epoch 277/1000
125/125 - 0s - loss: 0.2177 - accuracy: 0.9047
Epoch 278/1000
125/125 - 0s - loss: 0.2211 - accuracy: 0.9017
Epoch 279/1000
125/125 - 0s - loss: 0.2220 - accuracy: 0.9050
Epoch 280/1000
125/125 - 0s - loss: 0.2178 - accuracy: 0.9070
Epoch 281/1000
125/125 - 0s - loss: 0.2148 - accuracy: 0.9067
Epoch 282/1000
125/125 - 0s - loss: 0.2250 - accuracy: 0.9037
Epoch 283/1000
125/125 

Epoch 400/1000
125/125 - 0s - loss: 0.2031 - accuracy: 0.9108
Epoch 401/1000
125/125 - 0s - loss: 0.1998 - accuracy: 0.9135
Epoch 402/1000
125/125 - 0s - loss: 0.2030 - accuracy: 0.9138
Epoch 403/1000
125/125 - 0s - loss: 0.2027 - accuracy: 0.9100
Epoch 404/1000
125/125 - 0s - loss: 0.2005 - accuracy: 0.9103
Epoch 405/1000
125/125 - 0s - loss: 0.2136 - accuracy: 0.9085
Epoch 406/1000
125/125 - 0s - loss: 0.2079 - accuracy: 0.9103
Epoch 407/1000
125/125 - 0s - loss: 0.1963 - accuracy: 0.9113
Epoch 408/1000
125/125 - 0s - loss: 0.2020 - accuracy: 0.9135
Epoch 409/1000
125/125 - 0s - loss: 0.2037 - accuracy: 0.9085
Epoch 410/1000
125/125 - 0s - loss: 0.1974 - accuracy: 0.9133
Epoch 411/1000
125/125 - 0s - loss: 0.2009 - accuracy: 0.9125
Epoch 412/1000
125/125 - 0s - loss: 0.2055 - accuracy: 0.9090
Epoch 413/1000
125/125 - 0s - loss: 0.2119 - accuracy: 0.9057
Epoch 414/1000
125/125 - 0s - loss: 0.2083 - accuracy: 0.9065
Epoch 415/1000
125/125 - 0s - loss: 0.1967 - accuracy: 0.9148
Epoch 41

125/125 - 0s - loss: 0.1849 - accuracy: 0.9213
Epoch 533/1000
125/125 - 0s - loss: 0.1852 - accuracy: 0.9203
Epoch 534/1000
125/125 - 0s - loss: 0.1921 - accuracy: 0.9153
Epoch 535/1000
125/125 - 0s - loss: 0.1860 - accuracy: 0.9170
Epoch 536/1000
125/125 - 0s - loss: 0.1841 - accuracy: 0.9173
Epoch 537/1000
125/125 - 0s - loss: 0.1788 - accuracy: 0.9226
Epoch 538/1000
125/125 - 0s - loss: 0.1836 - accuracy: 0.9180
Epoch 539/1000
125/125 - 0s - loss: 0.1970 - accuracy: 0.9120
Epoch 540/1000
125/125 - 0s - loss: 0.1892 - accuracy: 0.9186
Epoch 541/1000
125/125 - 0s - loss: 0.1944 - accuracy: 0.9108
Epoch 542/1000
125/125 - 0s - loss: 0.1874 - accuracy: 0.9186
Epoch 543/1000
125/125 - 0s - loss: 0.1865 - accuracy: 0.9165
Epoch 544/1000
125/125 - 0s - loss: 0.1875 - accuracy: 0.9198
Epoch 545/1000
125/125 - 0s - loss: 0.1840 - accuracy: 0.9163
Epoch 546/1000
125/125 - 0s - loss: 0.1834 - accuracy: 0.9203
Epoch 547/1000
125/125 - 0s - loss: 0.1827 - accuracy: 0.9201
Epoch 548/1000
125/125 

Epoch 665/1000
125/125 - 0s - loss: 0.1669 - accuracy: 0.9256
Epoch 666/1000
125/125 - 0s - loss: 0.1622 - accuracy: 0.9286
Epoch 667/1000
125/125 - 0s - loss: 0.1741 - accuracy: 0.9216
Epoch 668/1000
125/125 - 0s - loss: 0.1814 - accuracy: 0.9201
Epoch 669/1000
125/125 - 0s - loss: 0.1721 - accuracy: 0.9253
Epoch 670/1000
125/125 - 0s - loss: 0.1623 - accuracy: 0.9261
Epoch 671/1000
125/125 - 0s - loss: 0.1694 - accuracy: 0.9251
Epoch 672/1000
125/125 - 0s - loss: 0.1616 - accuracy: 0.9286
Epoch 673/1000
125/125 - 0s - loss: 0.1885 - accuracy: 0.9145
Epoch 674/1000
125/125 - 0s - loss: 0.1724 - accuracy: 0.9236
Epoch 675/1000
125/125 - 0s - loss: 0.1618 - accuracy: 0.9266
Epoch 676/1000
125/125 - 0s - loss: 0.1672 - accuracy: 0.9268
Epoch 677/1000
125/125 - 0s - loss: 0.1594 - accuracy: 0.9306
Epoch 678/1000
125/125 - 0s - loss: 0.1735 - accuracy: 0.9231
Epoch 679/1000
125/125 - 0s - loss: 0.1673 - accuracy: 0.9266
Epoch 680/1000
125/125 - 0s - loss: 0.1674 - accuracy: 0.9241
Epoch 68

125/125 - 0s - loss: 0.1547 - accuracy: 0.9306
Epoch 798/1000
125/125 - 0s - loss: 0.1495 - accuracy: 0.9324
Epoch 799/1000
125/125 - 0s - loss: 0.1561 - accuracy: 0.9309
Epoch 800/1000
125/125 - 0s - loss: 0.1635 - accuracy: 0.9281
Epoch 801/1000
125/125 - 0s - loss: 0.1528 - accuracy: 0.9324
Epoch 802/1000
125/125 - 0s - loss: 0.1545 - accuracy: 0.9299
Epoch 803/1000
125/125 - 0s - loss: 0.1607 - accuracy: 0.9251
Epoch 804/1000
125/125 - 0s - loss: 0.1624 - accuracy: 0.9268
Epoch 805/1000
125/125 - 0s - loss: 0.1535 - accuracy: 0.9309
Epoch 806/1000
125/125 - 0s - loss: 0.1468 - accuracy: 0.9339
Epoch 807/1000
125/125 - 0s - loss: 0.1458 - accuracy: 0.9354
Epoch 808/1000
125/125 - 0s - loss: 0.1502 - accuracy: 0.9341
Epoch 809/1000
125/125 - 0s - loss: 0.1514 - accuracy: 0.9304
Epoch 810/1000
125/125 - 0s - loss: 0.1440 - accuracy: 0.9392
Epoch 811/1000
125/125 - 0s - loss: 0.1483 - accuracy: 0.9364
Epoch 812/1000
125/125 - 0s - loss: 0.1567 - accuracy: 0.9311
Epoch 813/1000
125/125 

Epoch 930/1000
125/125 - 0s - loss: 0.1422 - accuracy: 0.9364
Epoch 931/1000
125/125 - 0s - loss: 0.1357 - accuracy: 0.9404
Epoch 932/1000
125/125 - 0s - loss: 0.1363 - accuracy: 0.9412
Epoch 933/1000
125/125 - 0s - loss: 0.1542 - accuracy: 0.9361
Epoch 934/1000
125/125 - 0s - loss: 0.1707 - accuracy: 0.9296
Epoch 935/1000
125/125 - 0s - loss: 0.1319 - accuracy: 0.9460
Epoch 936/1000
125/125 - 0s - loss: 0.1383 - accuracy: 0.9392
Epoch 937/1000
125/125 - 0s - loss: 0.1378 - accuracy: 0.9407
Epoch 938/1000
125/125 - 0s - loss: 0.1323 - accuracy: 0.9427
Epoch 939/1000
125/125 - 0s - loss: 0.1602 - accuracy: 0.9291
Epoch 940/1000
125/125 - 0s - loss: 0.1326 - accuracy: 0.9422
Epoch 941/1000
125/125 - 0s - loss: 0.1318 - accuracy: 0.9417
Epoch 942/1000
125/125 - 0s - loss: 0.1337 - accuracy: 0.9409
Epoch 943/1000
125/125 - 0s - loss: 0.1404 - accuracy: 0.9407
Epoch 944/1000
125/125 - 0s - loss: 0.1354 - accuracy: 0.9424
Epoch 945/1000
125/125 - 0s - loss: 0.1338 - accuracy: 0.9387
Epoch 94

<tensorflow.python.keras.callbacks.History at 0x264577bb6c8>

# Save the Model

In [25]:
model.save("DL.h5")

# Evaluate the Model

In [26]:
# Load the model
from tensorflow.keras.models import load_model
model = load_model("DL.h5")

In [27]:
# Evaluate the model using the training data
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

42/42 - 0s - loss: 0.3833 - accuracy: 0.8861
Loss: 0.38327762484550476, Accuracy: 0.8861236572265625


In [28]:
X_train[0]

array([5.13957503e-03, 1.49634010e-02, 1.22097389e-04, 6.13938316e-06,
       2.01715266e-02, 4.75905371e-07, 2.65137706e-04])

In [29]:
X_test_scaled[0]

array([9.56292284e-03, 2.49060527e-02, 8.36256826e-05, 5.78998737e-06,
       8.86449400e-02, 7.91217799e-05, 1.77863211e-03])

In [30]:
# Grab just one data point to test with
test = np.expand_dims(X_test_scaled[0], axis=0)
test.shape

(1, 7)

In [31]:
# Make a prediction. The result should be 5 - STANDING
print(f"Predicted class: {model.predict_classes(test)}")

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
Predicted class: [3]
