In [2]:
!pip install sklearn --upgrade

Requirement already up-to-date: sklearn in c:\users\home laptop\anaconda3\lib\site-packages (0.0)


In [3]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [4]:
!pip install graphviz



In [5]:
from numpy.random import seed
seed(1)

In [6]:
# import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense

In [7]:
import pandas as pd
import numpy as np

# Read the CSV and Perform Basic Data Cleaning

In [8]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


# Select your features (columns)

In [9]:
# Set features. This will also be used as your x values.
selected_features = df[['koi_disposition','koi_impact','koi_duration','koi_depth','koi_prad','koi_teq','koi_insol','koi_model_snr', 'koi_slogg']]
selected_features = selected_features[selected_features['koi_disposition']!='CANDIDATE']
selected_features['koi_disposition'] = selected_features['koi_disposition'].map({'CONFIRMED': 1, 'CANDIDATE': 2, 'FALSE POSITIVE':3})
selected_features

Unnamed: 0,koi_disposition,koi_impact,koi_duration,koi_depth,koi_prad,koi_teq,koi_insol,koi_model_snr
0,1,0.586,4.50700,874.8,2.83,443,9.11,25.8
1,3,0.969,1.78220,10829.0,14.60,638,39.30,76.3
2,3,1.276,2.40641,8079.2,33.46,1395,891.96,505.6
3,1,0.701,1.65450,603.3,2.75,1406,926.16,40.9
4,1,0.762,3.14020,686.0,2.77,1160,427.65,40.2
...,...,...,...,...,...,...,...,...
6983,3,2.441,77.98300,1583.8,534.47,1006,242.54,171.5
6986,3,0.765,4.80600,87.7,1.11,929,176.40,8.4
6987,3,1.252,3.22210,1579.2,29.35,2088,4500.53,453.3
6989,3,0.147,0.86500,103.6,1.07,2218,5713.41,12.3


In [10]:
# Read the training data into a dataframe
X = selected_features[['koi_impact','koi_duration','koi_depth','koi_prad','koi_teq','koi_insol','koi_model_snr','koi_slogg']]
y = selected_features['koi_disposition']

In [11]:
#columns
feature_names = selected_features.columns

# Create a Train Test Split

In [12]:
# Use train_test_split to create training and testing data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [13]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [14]:
# Read the training data into a dataframe
X_train_scaled_df = pd.DataFrame(X_train_scaled)
X_train_scaled_df.head()

Unnamed: 0,0,1,2,3,4,5,6
0,0.00514,0.014963,0.000122,6e-06,0.020172,4.759054e-07,0.000265
1,0.009245,0.017742,0.0025,5.6e-05,0.057221,1.584646e-05,0.023829
2,0.012618,0.056428,0.00228,0.000163,0.059142,1.788253e-05,0.015312
3,0.008591,0.024841,0.000269,1.1e-05,0.046998,7.879385e-06,0.002408
4,0.001915,0.072343,0.0002,1.3e-05,0.048714,8.944463e-06,0.007192


In [15]:
# Convert the dataframe to a numpy array for Keras
X_train = X_train_scaled_df.values
X_train.shape

(3978, 7)

In [16]:
# Read in the training labels as a dataframe
y_train_df = pd.DataFrame(y_train)
y_train_df.head()

Unnamed: 0,koi_disposition
4608,3
2644,1
1322,3
460,1
2579,1


In [17]:
y_train = y_train_df.values
y_train.shape

(3978, 1)

In [18]:
# One-hot encode the integer labels
y_train = to_categorical(y_train_df)
y_train[:10]
y_train.shape

(3978, 4)

In [19]:
# Read the testing labels as a dataframe
y_test_df = pd.DataFrame(y_test)
y_test.shape

(1326,)

In [20]:
# One-hot encode the integer labels
y_test = to_categorical(y_test_df)
y_test_df.shape

(1326, 1)

# Build a Deep Neural Network

In [21]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=7))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=4, activation='softmax'))

In [22]:
# The output layer has 2 columns that are one-hot encoded
y_train.shape

(3978, 4)

In [23]:
# Compile the model using categorical_crossentropy for the loss function, the adam optimizer,
# and add accuracy to the training metrics
model.compile(loss="categorical_crossentropy",
              optimizer="adam", metrics=['accuracy'])

In [24]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               800       
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 404       
Total params: 21,404
Trainable params: 21,404
Non-trainable params: 0
_________________________________________________________________


In [26]:
# from ann_visualizer.visualize import ann_viz
# ann_viz(model, title="My first neural network")

In [30]:
!pip install pydot



In [32]:
check_pydot()

NameError: name 'check_pydot' is not defined

In [33]:
# Visualising the neural network
from keras import models, layers, optimizers, regularizers
from tensorflow.python.keras.engine import network
from keras.utils.vis_utils import model_to_dot
from IPython.display import SVG
SVG(model_to_dot(model, show_layer_names=False, show_shapes=True).create(prog='dot', format='svg'))

# # Training the model
# model_history = model.fit(X_train,
#                   y_train,
#                   epochs=100,
#                   batch_size=256,
#                   validation_split = 0.1)

InvocationException: GraphViz's executables not found

In [27]:
# Use the training data to fit (train) the model
model.fit(
    X_train_scaled,
    y_train,
    epochs=1000,
    shuffle=True,
    verbose=2
)

Epoch 1/1000
125/125 - 0s - loss: 0.7262 - accuracy: 0.6601
Epoch 2/1000
125/125 - 0s - loss: 0.5004 - accuracy: 0.7149
Epoch 3/1000
125/125 - 0s - loss: 0.4597 - accuracy: 0.7604
Epoch 4/1000
125/125 - 0s - loss: 0.4445 - accuracy: 0.7705
Epoch 5/1000
125/125 - 0s - loss: 0.4305 - accuracy: 0.7818
Epoch 6/1000
125/125 - 0s - loss: 0.4229 - accuracy: 0.7891
Epoch 7/1000
125/125 - 0s - loss: 0.4215 - accuracy: 0.7936
Epoch 8/1000
125/125 - 0s - loss: 0.4136 - accuracy: 0.8019
Epoch 9/1000
125/125 - 0s - loss: 0.4050 - accuracy: 0.7994
Epoch 10/1000
125/125 - 0s - loss: 0.4035 - accuracy: 0.8044
Epoch 11/1000
125/125 - 0s - loss: 0.4032 - accuracy: 0.8059
Epoch 12/1000
125/125 - 0s - loss: 0.3967 - accuracy: 0.8077
Epoch 13/1000
125/125 - 0s - loss: 0.3928 - accuracy: 0.8160
Epoch 14/1000
125/125 - 0s - loss: 0.3919 - accuracy: 0.8150
Epoch 15/1000
125/125 - 0s - loss: 0.3863 - accuracy: 0.8200
Epoch 16/1000
125/125 - 0s - loss: 0.3896 - accuracy: 0.8172
Epoch 17/1000
125/125 - 0s - loss

Epoch 135/1000
125/125 - 0s - loss: 0.2399 - accuracy: 0.9010
Epoch 136/1000
125/125 - 0s - loss: 0.2462 - accuracy: 0.8947
Epoch 137/1000
125/125 - 0s - loss: 0.2412 - accuracy: 0.8949
Epoch 138/1000
125/125 - 0s - loss: 0.2409 - accuracy: 0.8969
Epoch 139/1000
125/125 - 0s - loss: 0.2406 - accuracy: 0.8942
Epoch 140/1000
125/125 - 0s - loss: 0.2400 - accuracy: 0.8954
Epoch 141/1000
125/125 - 0s - loss: 0.2532 - accuracy: 0.8904
Epoch 142/1000
125/125 - 0s - loss: 0.2481 - accuracy: 0.8889
Epoch 143/1000
125/125 - 0s - loss: 0.2578 - accuracy: 0.8844
Epoch 144/1000
125/125 - 0s - loss: 0.2401 - accuracy: 0.8972
Epoch 145/1000
125/125 - 0s - loss: 0.2384 - accuracy: 0.8964
Epoch 146/1000
125/125 - 0s - loss: 0.2523 - accuracy: 0.8927
Epoch 147/1000
125/125 - 0s - loss: 0.2371 - accuracy: 0.8972
Epoch 148/1000
125/125 - 0s - loss: 0.2448 - accuracy: 0.8944
Epoch 149/1000
125/125 - 0s - loss: 0.2404 - accuracy: 0.8954
Epoch 150/1000
125/125 - 0s - loss: 0.2403 - accuracy: 0.8984
Epoch 15

125/125 - 0s - loss: 0.2316 - accuracy: 0.8994
Epoch 268/1000
125/125 - 0s - loss: 0.2182 - accuracy: 0.9032
Epoch 269/1000
125/125 - 0s - loss: 0.2331 - accuracy: 0.8974
Epoch 270/1000
125/125 - 0s - loss: 0.2206 - accuracy: 0.9007
Epoch 271/1000
125/125 - 0s - loss: 0.2170 - accuracy: 0.9047
Epoch 272/1000
125/125 - 0s - loss: 0.2260 - accuracy: 0.9037
Epoch 273/1000
125/125 - 0s - loss: 0.2187 - accuracy: 0.9037
Epoch 274/1000
125/125 - 0s - loss: 0.2160 - accuracy: 0.9070
Epoch 275/1000
125/125 - 0s - loss: 0.2210 - accuracy: 0.9065
Epoch 276/1000
125/125 - 0s - loss: 0.2206 - accuracy: 0.9042
Epoch 277/1000
125/125 - 0s - loss: 0.2285 - accuracy: 0.9057
Epoch 278/1000
125/125 - 0s - loss: 0.2126 - accuracy: 0.9060
Epoch 279/1000
125/125 - 0s - loss: 0.2255 - accuracy: 0.8987
Epoch 280/1000
125/125 - 0s - loss: 0.2118 - accuracy: 0.9067
Epoch 281/1000
125/125 - 0s - loss: 0.2159 - accuracy: 0.9027
Epoch 282/1000
125/125 - 0s - loss: 0.2299 - accuracy: 0.8984
Epoch 283/1000
125/125 

Epoch 400/1000
125/125 - 0s - loss: 0.2016 - accuracy: 0.9108
Epoch 401/1000
125/125 - 0s - loss: 0.2034 - accuracy: 0.9110
Epoch 402/1000
125/125 - 1s - loss: 0.1959 - accuracy: 0.9140
Epoch 403/1000
125/125 - 0s - loss: 0.1960 - accuracy: 0.9128
Epoch 404/1000
125/125 - 0s - loss: 0.2071 - accuracy: 0.9057
Epoch 405/1000
125/125 - 0s - loss: 0.1977 - accuracy: 0.9115
Epoch 406/1000
125/125 - 0s - loss: 0.1973 - accuracy: 0.9173
Epoch 407/1000
125/125 - 0s - loss: 0.1956 - accuracy: 0.9138
Epoch 408/1000
125/125 - 0s - loss: 0.1950 - accuracy: 0.9138
Epoch 409/1000
125/125 - 0s - loss: 0.1965 - accuracy: 0.9120
Epoch 410/1000
125/125 - 0s - loss: 0.2102 - accuracy: 0.9072
Epoch 411/1000
125/125 - 0s - loss: 0.1991 - accuracy: 0.9120
Epoch 412/1000
125/125 - 0s - loss: 0.2005 - accuracy: 0.9123
Epoch 413/1000
125/125 - 0s - loss: 0.1961 - accuracy: 0.9153
Epoch 414/1000
125/125 - 0s - loss: 0.1989 - accuracy: 0.9118
Epoch 415/1000
125/125 - 0s - loss: 0.1964 - accuracy: 0.9150
Epoch 41

Epoch 533/1000
125/125 - 0s - loss: 0.1832 - accuracy: 0.9213
Epoch 534/1000
125/125 - 0s - loss: 0.1863 - accuracy: 0.9165
Epoch 535/1000
125/125 - 0s - loss: 0.1899 - accuracy: 0.9128
Epoch 536/1000
125/125 - 0s - loss: 0.1795 - accuracy: 0.9211
Epoch 537/1000
125/125 - 0s - loss: 0.1774 - accuracy: 0.9188
Epoch 538/1000
125/125 - 0s - loss: 0.1827 - accuracy: 0.9173
Epoch 539/1000
125/125 - 0s - loss: 0.1821 - accuracy: 0.9165
Epoch 540/1000
125/125 - 0s - loss: 0.1818 - accuracy: 0.9191
Epoch 541/1000
125/125 - 0s - loss: 0.1809 - accuracy: 0.9186
Epoch 542/1000
125/125 - 0s - loss: 0.1754 - accuracy: 0.9218
Epoch 543/1000
125/125 - 0s - loss: 0.1815 - accuracy: 0.9208
Epoch 544/1000
125/125 - 0s - loss: 0.1799 - accuracy: 0.9186
Epoch 545/1000
125/125 - 0s - loss: 0.1822 - accuracy: 0.9173
Epoch 546/1000
125/125 - 0s - loss: 0.1862 - accuracy: 0.9175
Epoch 547/1000
125/125 - 0s - loss: 0.1776 - accuracy: 0.9236
Epoch 548/1000
125/125 - 0s - loss: 0.1943 - accuracy: 0.9163
Epoch 54

125/125 - 0s - loss: 0.1609 - accuracy: 0.9284
Epoch 666/1000
125/125 - 0s - loss: 0.1615 - accuracy: 0.9301
Epoch 667/1000
125/125 - 0s - loss: 0.1718 - accuracy: 0.9241
Epoch 668/1000
125/125 - 0s - loss: 0.1598 - accuracy: 0.9304
Epoch 669/1000
125/125 - 0s - loss: 0.1621 - accuracy: 0.9296
Epoch 670/1000
125/125 - 0s - loss: 0.1645 - accuracy: 0.9266
Epoch 671/1000
125/125 - 0s - loss: 0.1768 - accuracy: 0.9233
Epoch 672/1000
125/125 - 0s - loss: 0.1734 - accuracy: 0.9248
Epoch 673/1000
125/125 - 0s - loss: 0.1649 - accuracy: 0.9248
Epoch 674/1000
125/125 - 0s - loss: 0.1687 - accuracy: 0.9263
Epoch 675/1000
125/125 - 0s - loss: 0.1728 - accuracy: 0.9253
Epoch 676/1000
125/125 - 0s - loss: 0.1739 - accuracy: 0.9253
Epoch 677/1000
125/125 - 0s - loss: 0.1802 - accuracy: 0.9198
Epoch 678/1000
125/125 - 0s - loss: 0.1710 - accuracy: 0.9253
Epoch 679/1000
125/125 - 0s - loss: 0.1692 - accuracy: 0.9236
Epoch 680/1000
125/125 - 0s - loss: 0.1722 - accuracy: 0.9251
Epoch 681/1000
125/125 

Epoch 798/1000
125/125 - 0s - loss: 0.1673 - accuracy: 0.9261
Epoch 799/1000
125/125 - 0s - loss: 0.1544 - accuracy: 0.9296
Epoch 800/1000
125/125 - 0s - loss: 0.1543 - accuracy: 0.9349
Epoch 801/1000
125/125 - 0s - loss: 0.1451 - accuracy: 0.9379
Epoch 802/1000
125/125 - 0s - loss: 0.1463 - accuracy: 0.9379
Epoch 803/1000
125/125 - 0s - loss: 0.1488 - accuracy: 0.9351
Epoch 804/1000
125/125 - 0s - loss: 0.1465 - accuracy: 0.9354
Epoch 805/1000
125/125 - 0s - loss: 0.1524 - accuracy: 0.9326
Epoch 806/1000
125/125 - 0s - loss: 0.1621 - accuracy: 0.9266
Epoch 807/1000
125/125 - 0s - loss: 0.1477 - accuracy: 0.9346
Epoch 808/1000
125/125 - 0s - loss: 0.1518 - accuracy: 0.9344
Epoch 809/1000
125/125 - 0s - loss: 0.1427 - accuracy: 0.9367
Epoch 810/1000
125/125 - 0s - loss: 0.1503 - accuracy: 0.9316
Epoch 811/1000
125/125 - 0s - loss: 0.1541 - accuracy: 0.9294
Epoch 812/1000
125/125 - 0s - loss: 0.1513 - accuracy: 0.9299
Epoch 813/1000
125/125 - 0s - loss: 0.1450 - accuracy: 0.9361
Epoch 81

125/125 - 0s - loss: 0.1318 - accuracy: 0.9424
Epoch 931/1000
125/125 - 0s - loss: 0.1404 - accuracy: 0.9361
Epoch 932/1000
125/125 - 0s - loss: 0.1478 - accuracy: 0.9311
Epoch 933/1000
125/125 - 0s - loss: 0.1358 - accuracy: 0.9382
Epoch 934/1000
125/125 - 0s - loss: 0.1328 - accuracy: 0.9447
Epoch 935/1000
125/125 - 0s - loss: 0.1361 - accuracy: 0.9409
Epoch 936/1000
125/125 - 0s - loss: 0.1317 - accuracy: 0.9384
Epoch 937/1000
125/125 - 0s - loss: 0.1349 - accuracy: 0.9404
Epoch 938/1000
125/125 - 0s - loss: 0.1464 - accuracy: 0.9344
Epoch 939/1000
125/125 - 0s - loss: 0.1283 - accuracy: 0.9462
Epoch 940/1000
125/125 - 0s - loss: 0.1302 - accuracy: 0.9429
Epoch 941/1000
125/125 - 1s - loss: 0.1384 - accuracy: 0.9402
Epoch 942/1000
125/125 - 0s - loss: 0.1331 - accuracy: 0.9419
Epoch 943/1000
125/125 - 0s - loss: 0.1315 - accuracy: 0.9442
Epoch 944/1000
125/125 - 0s - loss: 0.1302 - accuracy: 0.9412
Epoch 945/1000
125/125 - 0s - loss: 0.1287 - accuracy: 0.9457
Epoch 946/1000
125/125 

<tensorflow.python.keras.callbacks.History at 0x216de80e4c8>

# Save the Model

In [None]:
model.save("DL.h5")

# Evaluate the Model

In [None]:
# Load the model
from tensorflow.keras.models import load_model
model = load_model("DL.h5")

In [None]:
# Evaluate the model using the training data
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
X_train[0]

In [None]:
X_test_scaled[0]

In [None]:
# Grab just one data point to test with
test = np.expand_dims(X_test_scaled[0], axis=0)
test.shape

In [None]:
# Make a prediction. The result should be 5 - STANDING
print(f"Predicted class: {model.predict_classes(test)}")