In [1]:
!pip install sklearn --upgrade

Requirement already up-to-date: sklearn in c:\users\home laptop\anaconda3\lib\site-packages (0.0)


In [2]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [3]:
from numpy.random import seed
seed(1)

In [4]:
# import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense

In [5]:
import pandas as pd
import numpy as np

# Read the CSV and Perform Basic Data Cleaning

In [6]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


# Select your features (columns)

In [7]:
# Set features. This will also be used as your x values.
selected_features = df[['koi_disposition','koi_impact','koi_duration','koi_depth','koi_prad','koi_teq','koi_insol','koi_model_snr','koi_slogg']]
selected_features = selected_features[selected_features['koi_disposition']!='CANDIDATE']
selected_features['koi_disposition'] = selected_features['koi_disposition'].map({'CONFIRMED': 1, 'CANDIDATE': 2, 'FALSE POSITIVE':3})
selected_features

Unnamed: 0,koi_disposition,koi_impact,koi_duration,koi_depth,koi_prad,koi_teq,koi_insol,koi_model_snr,koi_slogg
0,1,0.586,4.50700,874.8,2.83,443,9.11,25.8,4.467
1,3,0.969,1.78220,10829.0,14.60,638,39.30,76.3,4.544
2,3,1.276,2.40641,8079.2,33.46,1395,891.96,505.6,4.564
3,1,0.701,1.65450,603.3,2.75,1406,926.16,40.9,4.438
4,1,0.762,3.14020,686.0,2.77,1160,427.65,40.2,4.486
...,...,...,...,...,...,...,...,...,...
6983,3,2.441,77.98300,1583.8,534.47,1006,242.54,171.5,3.508
6986,3,0.765,4.80600,87.7,1.11,929,176.40,8.4,4.296
6987,3,1.252,3.22210,1579.2,29.35,2088,4500.53,453.3,4.529
6989,3,0.147,0.86500,103.6,1.07,2218,5713.41,12.3,4.447


In [8]:
# Read the training data into a dataframe
X = selected_features[['koi_impact','koi_duration','koi_depth','koi_prad','koi_teq','koi_insol','koi_model_snr','koi_slogg']]
y = selected_features['koi_disposition']

In [9]:
#columns
feature_names = selected_features.columns[1:9]
target = selected_features.columns[:1]
target_names = ["CONFIRMED", "FALSE POSITIVE"]
print(feature_names,target_names)

Index(['koi_impact', 'koi_duration', 'koi_depth', 'koi_prad', 'koi_teq',
       'koi_insol', 'koi_model_snr', 'koi_slogg'],
      dtype='object') ['CONFIRMED', 'FALSE POSITIVE']


# Create a Train Test Split

In [10]:
# Use train_test_split to create training and testing data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [11]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
# Read the training data into a dataframe
X_train_scaled_df = pd.DataFrame(X_train_scaled)
X_train_scaled_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.00514,0.014963,0.000122,6e-06,0.020172,4.759054e-07,0.000265,0.84284
1,0.009245,0.017742,0.0025,5.6e-05,0.057221,1.584646e-05,0.023829,0.787595
2,0.012618,0.056428,0.00228,0.000163,0.059142,1.788253e-05,0.015312,0.830874
3,0.008591,0.024841,0.000269,1.1e-05,0.046998,7.879385e-06,0.002408,0.818309
4,0.001915,0.072343,0.0002,1.3e-05,0.048714,8.944463e-06,0.007192,0.783207


In [13]:
# Convert the dataframe to a numpy array for Keras
X_train = X_train_scaled_df.values
X_train.shape

(3978, 8)

In [14]:
# Read in the training labels as a dataframe
y_train_df = pd.DataFrame(y_train)
y_train_df.head()

Unnamed: 0,koi_disposition
4608,3
2644,1
1322,3
460,1
2579,1


In [15]:
y_train = y_train_df.values
y_train.shape

(3978, 1)

In [16]:
# One-hot encode the integer labels
y_train = to_categorical(y_train_df)
y_train[:10]
y_train.shape

(3978, 4)

In [17]:
# Read the testing labels as a dataframe
y_test_df = pd.DataFrame(y_test)
y_test.shape

(1326,)

In [18]:
# One-hot encode the integer labels
y_test = to_categorical(y_test_df)
y_test_df.shape

(1326, 1)

# Build a Deep Neural Network

In [19]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=8))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=4, activation='softmax'))

In [20]:
# The output layer has 2 columns that are one-hot encoded
y_train.shape

(3978, 4)

In [21]:
# Compile the model using categorical_crossentropy for the loss function, the adam optimizer,
# and add accuracy to the training metrics
model.compile(loss="categorical_crossentropy",
              optimizer="adam", metrics=['accuracy'])

In [22]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               900       
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 404       
Total params: 21,504
Trainable params: 21,504
Non-trainable params: 0
_________________________________________________________________


In [23]:
# Use the training data to fit (train) the model
model.fit(
    X_train_scaled,
    y_train,
    epochs=1000,
    shuffle=True,
    verbose=2
)

Epoch 1/1000
125/125 - 0s - loss: 0.7135 - accuracy: 0.6468
Epoch 2/1000
125/125 - 0s - loss: 0.5134 - accuracy: 0.7129
Epoch 3/1000
125/125 - 0s - loss: 0.4953 - accuracy: 0.7303
Epoch 4/1000
125/125 - 0s - loss: 0.4801 - accuracy: 0.7489
Epoch 5/1000
125/125 - 0s - loss: 0.4670 - accuracy: 0.7514
Epoch 6/1000
125/125 - 0s - loss: 0.4594 - accuracy: 0.7562
Epoch 7/1000
125/125 - 0s - loss: 0.4498 - accuracy: 0.7685
Epoch 8/1000
125/125 - 0s - loss: 0.4452 - accuracy: 0.7700
Epoch 9/1000
125/125 - 0s - loss: 0.4325 - accuracy: 0.7823
Epoch 10/1000
125/125 - 0s - loss: 0.4240 - accuracy: 0.7891
Epoch 11/1000
125/125 - 0s - loss: 0.4327 - accuracy: 0.7730
Epoch 12/1000
125/125 - 0s - loss: 0.4205 - accuracy: 0.7876
Epoch 13/1000
125/125 - 0s - loss: 0.4181 - accuracy: 0.7903
Epoch 14/1000
125/125 - 0s - loss: 0.4106 - accuracy: 0.8022
Epoch 15/1000
125/125 - 0s - loss: 0.3982 - accuracy: 0.8079
Epoch 16/1000
125/125 - 0s - loss: 0.4085 - accuracy: 0.8012
Epoch 17/1000
125/125 - 0s - loss

Epoch 135/1000
125/125 - 0s - loss: 0.2147 - accuracy: 0.9118
Epoch 136/1000
125/125 - 0s - loss: 0.2166 - accuracy: 0.9082
Epoch 137/1000
125/125 - 0s - loss: 0.2129 - accuracy: 0.9075
Epoch 138/1000
125/125 - 0s - loss: 0.2082 - accuracy: 0.9130
Epoch 139/1000
125/125 - 0s - loss: 0.2075 - accuracy: 0.9163
Epoch 140/1000
125/125 - 0s - loss: 0.2095 - accuracy: 0.9168
Epoch 141/1000
125/125 - 0s - loss: 0.2203 - accuracy: 0.9077
Epoch 142/1000
125/125 - 0s - loss: 0.2111 - accuracy: 0.9108
Epoch 143/1000
125/125 - 0s - loss: 0.2226 - accuracy: 0.9072
Epoch 144/1000
125/125 - 0s - loss: 0.2144 - accuracy: 0.9090
Epoch 145/1000
125/125 - 0s - loss: 0.2154 - accuracy: 0.9085
Epoch 146/1000
125/125 - 0s - loss: 0.2114 - accuracy: 0.9098
Epoch 147/1000
125/125 - 0s - loss: 0.2010 - accuracy: 0.9208
Epoch 148/1000
125/125 - 0s - loss: 0.2093 - accuracy: 0.9120
Epoch 149/1000
125/125 - 0s - loss: 0.2193 - accuracy: 0.9085
Epoch 150/1000
125/125 - 0s - loss: 0.2026 - accuracy: 0.9130
Epoch 15

125/125 - 0s - loss: 0.1855 - accuracy: 0.9218
Epoch 268/1000
125/125 - 0s - loss: 0.1814 - accuracy: 0.9221
Epoch 269/1000
125/125 - 0s - loss: 0.1802 - accuracy: 0.9251
Epoch 270/1000
125/125 - 0s - loss: 0.1844 - accuracy: 0.9221
Epoch 271/1000
125/125 - 0s - loss: 0.1720 - accuracy: 0.9279
Epoch 272/1000
125/125 - 0s - loss: 0.1785 - accuracy: 0.9241
Epoch 273/1000
125/125 - 0s - loss: 0.1779 - accuracy: 0.9243
Epoch 274/1000
125/125 - 0s - loss: 0.1788 - accuracy: 0.9251
Epoch 275/1000
125/125 - 0s - loss: 0.1859 - accuracy: 0.9233
Epoch 276/1000
125/125 - 0s - loss: 0.1795 - accuracy: 0.9241
Epoch 277/1000
125/125 - 0s - loss: 0.1925 - accuracy: 0.9158
Epoch 278/1000
125/125 - 0s - loss: 0.1743 - accuracy: 0.9261
Epoch 279/1000
125/125 - 0s - loss: 0.1719 - accuracy: 0.9263
Epoch 280/1000
125/125 - 0s - loss: 0.1829 - accuracy: 0.9238
Epoch 281/1000
125/125 - 0s - loss: 0.1776 - accuracy: 0.9284
Epoch 282/1000
125/125 - 0s - loss: 0.1724 - accuracy: 0.9266
Epoch 283/1000
125/125 

Epoch 400/1000
125/125 - 0s - loss: 0.1518 - accuracy: 0.9364
Epoch 401/1000
125/125 - 0s - loss: 0.1537 - accuracy: 0.9374
Epoch 402/1000
125/125 - 0s - loss: 0.1598 - accuracy: 0.9301
Epoch 403/1000
125/125 - 0s - loss: 0.1610 - accuracy: 0.9349
Epoch 404/1000
125/125 - 0s - loss: 0.1550 - accuracy: 0.9336
Epoch 405/1000
125/125 - 0s - loss: 0.1509 - accuracy: 0.9384
Epoch 406/1000
125/125 - 0s - loss: 0.1511 - accuracy: 0.9349
Epoch 407/1000
125/125 - 0s - loss: 0.1698 - accuracy: 0.9296
Epoch 408/1000
125/125 - 0s - loss: 0.1528 - accuracy: 0.9372
Epoch 409/1000
125/125 - 0s - loss: 0.1571 - accuracy: 0.9319
Epoch 410/1000
125/125 - 0s - loss: 0.1593 - accuracy: 0.9329
Epoch 411/1000
125/125 - 0s - loss: 0.1455 - accuracy: 0.9414
Epoch 412/1000
125/125 - 0s - loss: 0.1469 - accuracy: 0.9417
Epoch 413/1000
125/125 - 0s - loss: 0.1606 - accuracy: 0.9324
Epoch 414/1000
125/125 - 0s - loss: 0.1552 - accuracy: 0.9349
Epoch 415/1000
125/125 - 0s - loss: 0.1566 - accuracy: 0.9334
Epoch 41

125/125 - 0s - loss: 0.1367 - accuracy: 0.9449
Epoch 533/1000
125/125 - 0s - loss: 0.1446 - accuracy: 0.9382
Epoch 534/1000
125/125 - 0s - loss: 0.1268 - accuracy: 0.9462
Epoch 535/1000
125/125 - 0s - loss: 0.1351 - accuracy: 0.9417
Epoch 536/1000
125/125 - 0s - loss: 0.1296 - accuracy: 0.9437
Epoch 537/1000
125/125 - 0s - loss: 0.1374 - accuracy: 0.9444
Epoch 538/1000
125/125 - 0s - loss: 0.1320 - accuracy: 0.9462
Epoch 539/1000
125/125 - 0s - loss: 0.1302 - accuracy: 0.9442
Epoch 540/1000
125/125 - 0s - loss: 0.1426 - accuracy: 0.9404
Epoch 541/1000
125/125 - 0s - loss: 0.1290 - accuracy: 0.9482
Epoch 542/1000
125/125 - 0s - loss: 0.1306 - accuracy: 0.9472
Epoch 543/1000
125/125 - 0s - loss: 0.1364 - accuracy: 0.9447
Epoch 544/1000
125/125 - 0s - loss: 0.1372 - accuracy: 0.9454
Epoch 545/1000
125/125 - 0s - loss: 0.1305 - accuracy: 0.9457
Epoch 546/1000
125/125 - 0s - loss: 0.1385 - accuracy: 0.9414
Epoch 547/1000
125/125 - 0s - loss: 0.1440 - accuracy: 0.9397
Epoch 548/1000
125/125 

Epoch 665/1000
125/125 - 0s - loss: 0.1105 - accuracy: 0.9537
Epoch 666/1000
125/125 - 0s - loss: 0.1152 - accuracy: 0.9522
Epoch 667/1000
125/125 - 0s - loss: 0.1298 - accuracy: 0.9454
Epoch 668/1000
125/125 - 0s - loss: 0.1189 - accuracy: 0.9497
Epoch 669/1000
125/125 - 0s - loss: 0.1084 - accuracy: 0.9563
Epoch 670/1000
125/125 - 0s - loss: 0.1123 - accuracy: 0.9563
Epoch 671/1000
125/125 - 0s - loss: 0.1120 - accuracy: 0.9532
Epoch 672/1000
125/125 - 0s - loss: 0.1067 - accuracy: 0.9570
Epoch 673/1000
125/125 - 0s - loss: 0.1081 - accuracy: 0.9500
Epoch 674/1000
125/125 - 0s - loss: 0.1241 - accuracy: 0.9429
Epoch 675/1000
125/125 - 0s - loss: 0.1119 - accuracy: 0.9515
Epoch 676/1000
125/125 - 0s - loss: 0.1142 - accuracy: 0.9517
Epoch 677/1000
125/125 - 0s - loss: 0.1090 - accuracy: 0.9542
Epoch 678/1000
125/125 - 0s - loss: 0.1214 - accuracy: 0.9475
Epoch 679/1000
125/125 - 0s - loss: 0.1161 - accuracy: 0.9512
Epoch 680/1000
125/125 - 0s - loss: 0.1173 - accuracy: 0.9545
Epoch 68

125/125 - 0s - loss: 0.0969 - accuracy: 0.9613
Epoch 798/1000
125/125 - 0s - loss: 0.0975 - accuracy: 0.9620
Epoch 799/1000
125/125 - 0s - loss: 0.0964 - accuracy: 0.9573
Epoch 800/1000
125/125 - 0s - loss: 0.1091 - accuracy: 0.9535
Epoch 801/1000
125/125 - 0s - loss: 0.0916 - accuracy: 0.9628
Epoch 802/1000
125/125 - 0s - loss: 0.0982 - accuracy: 0.9580
Epoch 803/1000
125/125 - 0s - loss: 0.0899 - accuracy: 0.9641
Epoch 804/1000
125/125 - 0s - loss: 0.1055 - accuracy: 0.9565
Epoch 805/1000
125/125 - 0s - loss: 0.0925 - accuracy: 0.9630
Epoch 806/1000
125/125 - 0s - loss: 0.1058 - accuracy: 0.9527
Epoch 807/1000
125/125 - 0s - loss: 0.0944 - accuracy: 0.9605
Epoch 808/1000
125/125 - 0s - loss: 0.0973 - accuracy: 0.9598
Epoch 809/1000
125/125 - 0s - loss: 0.0994 - accuracy: 0.9583
Epoch 810/1000
125/125 - 0s - loss: 0.0952 - accuracy: 0.9615
Epoch 811/1000
125/125 - 0s - loss: 0.1049 - accuracy: 0.9555
Epoch 812/1000
125/125 - 0s - loss: 0.0955 - accuracy: 0.9608
Epoch 813/1000
125/125 

Epoch 930/1000
125/125 - 0s - loss: 0.0805 - accuracy: 0.9666
Epoch 931/1000
125/125 - 0s - loss: 0.0772 - accuracy: 0.9693
Epoch 932/1000
125/125 - 0s - loss: 0.0745 - accuracy: 0.9698
Epoch 933/1000
125/125 - 0s - loss: 0.0795 - accuracy: 0.9678
Epoch 934/1000
125/125 - 0s - loss: 0.0906 - accuracy: 0.9625
Epoch 935/1000
125/125 - 0s - loss: 0.0885 - accuracy: 0.9643
Epoch 936/1000
125/125 - 0s - loss: 0.0951 - accuracy: 0.9610
Epoch 937/1000
125/125 - 0s - loss: 0.0807 - accuracy: 0.9646
Epoch 938/1000
125/125 - 0s - loss: 0.0763 - accuracy: 0.9706
Epoch 939/1000
125/125 - 0s - loss: 0.0915 - accuracy: 0.9585
Epoch 940/1000
125/125 - 0s - loss: 0.0838 - accuracy: 0.9661
Epoch 941/1000
125/125 - 0s - loss: 0.0791 - accuracy: 0.9701
Epoch 942/1000
125/125 - 0s - loss: 0.0756 - accuracy: 0.9716
Epoch 943/1000
125/125 - 0s - loss: 0.0874 - accuracy: 0.9658
Epoch 944/1000
125/125 - 0s - loss: 0.0729 - accuracy: 0.9701
Epoch 945/1000
125/125 - 0s - loss: 0.0812 - accuracy: 0.9628
Epoch 94

<tensorflow.python.keras.callbacks.History at 0x19b99535cc8>

# Save the Model

In [24]:
model.save("DL.h5")

# Evaluate the Model

In [25]:
# Load the model
from tensorflow.keras.models import load_model
model = load_model("DL.h5")

In [26]:
# Evaluate the model using the training data
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

42/42 - 0s - loss: 0.4320 - accuracy: 0.9042
Loss: 0.43202123045921326, Accuracy: 0.9042232036590576


In [27]:
X_train[0]

array([5.13957503e-03, 1.49634010e-02, 1.22097389e-04, 6.13938316e-06,
       2.01715266e-02, 4.75905371e-07, 2.65137706e-04, 8.42840048e-01])

In [28]:
X_test_scaled[0]

array([9.56292284e-03, 2.49060527e-02, 8.36256826e-05, 5.78998737e-06,
       8.86449400e-02, 7.91217799e-05, 1.77863211e-03, 8.53011568e-01])

In [29]:
# Grab just one data point to test with
test = np.expand_dims(X_test_scaled[0], axis=0)
test.shape

(1, 8)

In [30]:
# Make a prediction. The result should be 5 - STANDING
print(f"Predicted class: {model.predict_classes(test)}")

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
Predicted class: [3]
