In [20]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade


Requirement already up-to-date: sklearn in c:\users\lmuns\anaconda3\envs\pythondata\lib\site-packages (0.0)


In [21]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [22]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import tensorflow
from numpy.random import seed

# Read the CSV and Perform Basic Data Cleaning

In [23]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()


In [24]:
target = df["koi_disposition"]
target_names = ["CONFIRMED", "FALSE POSITIVE"]

candidates = df[df["koi_disposition"] == "CANDIDATE"]
df = df[df["koi_disposition"] != "CANDIDATE"]

false = df[df["koi_disposition"] == "FALSE POSITIVE"]
true = df[df["koi_disposition"] == "CONFIRMED"]
print(len(false))
print(len(true))

3504
1800


# Select your features (columns)

In [25]:


data = df.drop("koi_disposition", axis=1)
#using the top X's from the random forest. 
feature_names = df[['koi_fpflag_co','koi_fpflag_nt', 'koi_fpflag_ss', 'koi_model_snr', 'koi_prad', 'koi_duration_err2', 'koi_duration_err1', 'koi_fpflag_ec', 'koi_prad_err2', 'koi_steff_err2', 'koi_prad_err1', 'koi_steff_err1', 'koi_time0bk_err1', 'koi_time0bk_err2', 'koi_period', 'koi_depth', 'koi_duration']]
data.head()
X = data
y = target

encoded_y = pd.get_dummies(df, columns=["koi_disposition"])
encoded_y = encoded_y["koi_disposition_CONFIRMED"]
encoded_y
y = encoded_y
print(X.shape, y.shape)


(5304, 40) (5304,)


# Create a Train Test Split

Use `koi_disposition` for the y values

In [26]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [27]:
X_train.head()



Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
5854,0,1,0,0,4.176929,3e-06,-3e-06,133.197382,0.000672,-0.000672,...,-162,4.628,0.035,-0.105,0.708,0.119,-0.051,291.05762,38.64534,15.22
3218,1,0,0,0,68.538296,0.001111,-0.001111,186.21716,0.00721,-0.00721,...,-56,2.613,0.028,-0.039,8.123,1.474,-0.295,288.03705,39.208988,13.983
2701,0,0,0,0,16.258028,0.000288,-0.000288,139.6381,0.0122,-0.0122,...,-128,4.235,0.143,-0.117,1.356,0.212,-0.259,287.62082,43.141647,13.178
1577,0,0,1,0,1.303577,2e-06,-2e-06,133.0758,0.00166,-0.00166,...,-198,4.494,0.052,-0.208,0.955,0.285,-0.102,293.62442,38.732479,14.834
627,0,0,0,0,15.573637,0.000101,-0.000101,132.44282,0.00436,-0.00436,...,-214,4.376,0.105,-0.195,1.083,0.35,-0.15,296.35867,39.115189,14.82


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [28]:
# Scale your data
from sklearn.preprocessing import MinMaxScaler

X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
X_train_scaled

array([[0.        , 1.        , 0.        , ..., 0.5125824 , 0.13219742,
        0.73271194],
       [1.        , 0.        , 0.        , ..., 0.37436113, 0.16822948,
        0.6229028 ],
       [0.        , 0.        , 0.        , ..., 0.35531445, 0.41963068,
        0.55144252],
       ...,
       [0.        , 1.        , 0.        , ..., 0.78773998, 0.82241916,
        0.53865957],
       [0.        , 0.        , 1.        , ..., 0.83648399, 0.30395021,
        0.58490901],
       [1.        , 0.        , 0.        , ..., 0.33148768, 0.5388321 ,
        0.72667554]])

In [29]:
from tensorflow.keras.utils import to_categorical
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)
y_train_categorical


array([[1., 0.],
       [1., 0.],
       [0., 1.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

# Train the Model



In [30]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()

model.add(Dense(units=100, activation='relu', input_dim=40))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

model.summary()



Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 100)               4100      
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 202       
Total params: 14,402
Trainable params: 14,402
Non-trainable params: 0
_________________________________________________________________


In [31]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [32]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=0
)

<tensorflow.python.keras.callbacks.History at 0x1b32e1b07f0>

In [33]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

1326/1326 - 0s - loss: 0.0266 - accuracy: 0.9925
Loss: 0.026605581170787998, Accuracy: 0.9924585223197937


# Save the Model

In [34]:
model.save("models/deep_leaning.h5")

candidates = candidates.drop("koi_disposition", axis=1)
candidates.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
29,0,0,0,0,4.959319,5.15e-07,-5.15e-07,172.258529,8.3e-05,-8.3e-05,...,-77,4.359,0.11,-0.11,1.082,0.173,-0.13,292.16705,48.727589,15.263
47,0,0,0,0,40.419504,0.0001139,-0.0001139,173.56469,0.00222,-0.00222,...,-163,4.507,0.116,-0.105,0.781,0.116,-0.095,294.31686,50.080231,15.487
50,0,0,0,0,7.240661,1.62e-05,-1.62e-05,137.75545,0.002,-0.002,...,-79,4.595,0.012,-0.064,0.765,0.055,-0.028,293.83331,50.23035,15.334
51,0,0,0,0,3.435916,4.73e-05,-4.73e-05,132.6624,0.011,-0.011,...,-115,4.339,0.132,-0.108,1.087,0.157,-0.142,287.88733,46.276241,12.791
59,0,0,0,0,1.62663,1.02e-06,-1.02e-06,169.820171,0.000487,-0.000487,...,-172,4.41,0.124,-0.186,0.973,0.27,-0.145,294.36819,38.31028,15.279


In [38]:

scaler = MinMaxScaler().fit(candidates)
candidates_scaled = scaler.transform(candidates)

predictions = model.predict(candidates_scaled)
print(predictions)

[[3.6498843e-04 9.9963498e-01]
 [8.2779860e-01 1.7220140e-01]
 [1.1152900e-04 9.9988842e-01]
 ...
 [1.0000000e+00 4.0114001e-09]
 [9.9950039e-01 4.9954141e-04]
 [1.5988424e-05 9.9998403e-01]]


[3.6498843e-04 9.9963498e-01]
[0.8277986 0.1722014]
[1.115290e-04 9.998884e-01]
[4.2514343e-06 9.9999571e-01]
[2.7667914e-04 9.9972326e-01]
[0.00201118 0.9979888 ]
[6.769309e-06 9.999932e-01]
[4.4663384e-06 9.9999559e-01]
[1.191830e-06 9.999988e-01]
[0.9891622  0.01083781]
[0.57590127 0.4240988 ]
[2.5974994e-04 9.9974018e-01]
[3.1367685e-07 9.9999964e-01]
[9.999913e-01 8.692305e-06]
[0.02323608 0.97676396]
[9.7003227e-05 9.9990296e-01]
[3.2317926e-06 9.9999678e-01]
[1.2590796e-04 9.9987411e-01]
[0.00276548 0.9972345 ]
[0.9282464  0.07175361]
[1.2850268e-06 9.9999869e-01]
[0.00392554 0.99607444]
[3.4162631e-06 9.9999654e-01]
[1.431888e-05 9.999857e-01]
[4.0292358e-05 9.9995971e-01]
[1.0000000e+00 4.0326263e-13]
[0.00453154 0.9954685 ]
[9.9996376e-01 3.6246482e-05]
[0.8348329  0.16516712]
[0.9936784  0.00632166]
[0.00119258 0.99880743]
[9.0101876e-06 9.9999094e-01]
[0.97974074 0.02025927]
[3.5978635e-04 9.9964023e-01]
[1.0407149e-05 9.9998963e-01]
[2.2805759e-06 9.9999774e-01]
[1.6423814