In [1]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade



In [2]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [3]:
import pandas as pd

# Read the CSV and Perform Basic Data Cleaning

In [4]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


In [5]:
# # Determine which columns to use for the model.  Removed the Error ratings
list(df.columns)


['koi_disposition',
 'koi_fpflag_nt',
 'koi_fpflag_ss',
 'koi_fpflag_co',
 'koi_fpflag_ec',
 'koi_period',
 'koi_period_err1',
 'koi_period_err2',
 'koi_time0bk',
 'koi_time0bk_err1',
 'koi_time0bk_err2',
 'koi_impact',
 'koi_impact_err1',
 'koi_impact_err2',
 'koi_duration',
 'koi_duration_err1',
 'koi_duration_err2',
 'koi_depth',
 'koi_depth_err1',
 'koi_depth_err2',
 'koi_prad',
 'koi_prad_err1',
 'koi_prad_err2',
 'koi_teq',
 'koi_insol',
 'koi_insol_err1',
 'koi_insol_err2',
 'koi_model_snr',
 'koi_tce_plnt_num',
 'koi_steff',
 'koi_steff_err1',
 'koi_steff_err2',
 'koi_slogg',
 'koi_slogg_err1',
 'koi_slogg_err2',
 'koi_srad',
 'koi_srad_err1',
 'koi_srad_err2',
 'ra',
 'dec',
 'koi_kepmag']

# Select your features (columns)

In [6]:
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

In [7]:
# Set features. This will also be used as your x values.
selected_features = df[[
#  'koi_fpflag_nt',
#  'koi_fpflag_ss',
#  'koi_fpflag_co',
#  'koi_fpflag_ec',
 'koi_disposition',
 'koi_period',
 'koi_time0bk',
 'koi_impact',
 'koi_duration',
 'koi_depth',
 'koi_prad',
 'koi_teq',
 'koi_insol',
 'koi_model_snr',
 'koi_tce_plnt_num',
 'koi_steff',
 'koi_slogg',
 'koi_srad',
 'ra',
 'dec',
 'koi_kepmag']]

selected_features

Unnamed: 0,koi_disposition,koi_period,koi_time0bk,koi_impact,koi_duration,koi_depth,koi_prad,koi_teq,koi_insol,koi_model_snr,koi_tce_plnt_num,koi_steff,koi_slogg,koi_srad,ra,dec,koi_kepmag
0,CONFIRMED,54.418383,162.513840,0.586,4.50700,874.8,2.83,443,9.11,25.8,2,5455,4.467,0.927,291.93423,48.141651,15.347
1,FALSE POSITIVE,19.899140,175.850252,0.969,1.78220,10829.0,14.60,638,39.30,76.3,1,5853,4.544,0.868,297.00482,48.134129,15.436
2,FALSE POSITIVE,1.736952,170.307565,1.276,2.40641,8079.2,33.46,1395,891.96,505.6,1,5805,4.564,0.791,285.53461,48.285210,15.597
3,CONFIRMED,2.525592,171.595550,0.701,1.65450,603.3,2.75,1406,926.16,40.9,1,6031,4.438,1.046,288.75488,48.226200,15.509
4,CONFIRMED,4.134435,172.979370,0.762,3.14020,686.0,2.77,1160,427.65,40.2,2,6046,4.486,0.972,296.28613,48.224670,15.714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6986,FALSE POSITIVE,8.589871,132.016100,0.765,4.80600,87.7,1.11,929,176.40,8.4,1,5638,4.296,1.088,298.74921,46.973351,14.478
6987,FALSE POSITIVE,0.527699,131.705093,1.252,3.22210,1579.2,29.35,2088,4500.53,453.3,1,5638,4.529,0.903,297.18875,47.093819,14.082
6988,CANDIDATE,1.739849,133.001270,0.043,3.11400,48.5,0.72,1608,1585.81,10.6,1,6119,4.444,1.031,286.50937,47.163219,14.757
6989,FALSE POSITIVE,0.681402,132.181750,0.147,0.86500,103.6,1.07,2218,5713.41,12.3,1,6173,4.447,1.041,294.16489,47.176281,15.385


In [8]:
target = selected_features['koi_disposition']
data = selected_features.drop('koi_disposition', axis=1)

X=data
# y = pd.get_dummies(target)
y=target



# Create a Train Test Split

Use `koi_disposition` for the y values

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

X_train

Unnamed: 0,koi_period,koi_time0bk,koi_impact,koi_duration,koi_depth,koi_prad,koi_teq,koi_insol,koi_model_snr,koi_tce_plnt_num,koi_steff,koi_slogg,koi_srad,ra,dec,koi_kepmag
6122,6.768901,133.077240,0.150,3.61600,123.1,1.24,1017,253.30,10.8,1,5737,4.327,1.125,294.40472,39.351681,14.725
6370,0.733726,132.020050,0.291,2.30900,114.6,0.86,1867,2891.64,13.8,1,5855,4.578,0.797,284.50391,42.463860,15.770
2879,7.652707,134.460380,0.970,79.89690,641.1,3.21,989,226.81,254.3,1,6328,4.481,0.963,295.50211,38.983540,13.099
107,7.953547,174.662240,0.300,2.63120,875.4,2.25,696,55.37,38.4,1,4768,4.536,0.779,291.15878,40.750271,15.660
29,4.959319,172.258529,0.831,2.22739,9802.0,12.21,1103,349.40,696.5,1,5712,4.359,1.082,292.16705,48.727589,15.263
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3772,8.268081,135.056330,1.099,3.47103,71479.0,50.31,863,130.83,1320.5,1,5749,4.502,0.922,292.53125,46.728699,15.768
5191,11.161938,133.553800,0.739,5.19500,124.4,1.97,1093,337.23,13.4,3,6200,4.072,1.640,295.21268,49.562180,13.374
5226,6.150251,134.422825,1.270,1.68923,2128.7,100.03,2251,6066.49,471.0,1,8914,3.896,2.867,297.18176,45.988441,10.622
5390,3.343285,134.845100,1.210,27.29000,166.8,64.00,2094,4535.97,79.5,2,6541,3.773,2.652,296.86258,41.147419,13.276


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [10]:
# Scale your data

from sklearn.preprocessing import MinMaxScaler
X_scaler = MinMaxScaler().fit(X_train)
# y_scaler = MinMaxScaler().fit(y_train)


In [11]:
# scaler based on training is applied to the test data set

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
# y_train_scaled = y_scaler.transform(y_train)
# y_test_scaled = y_scaler.transform(y_test)


# Model 1: Random Forest 



In [12]:
#Using Random Forest Classifier Model
from sklearn.ensemble import RandomForestClassifier

model_rf = RandomForestClassifier(n_estimators=200)
# model_rf.fit(X_train_scaled, y_train_scaled)
# model_rf.score(X_train_scaled, y_train_scaled)

model_rf.fit(X_train_scaled, y_train)
model_rf.score(X_train_scaled, y_train)

1.0

In [13]:
# Calculate feature importance & sort
importances = model_rf.feature_importances_
# importances
sorted(zip(model_rf.feature_importances_, selected_features), reverse=True)

[(0.14330184823781542, 'koi_insol'),
 (0.12154052487861203, 'koi_depth'),
 (0.08273496058286382, 'koi_disposition'),
 (0.08031381676133836, 'koi_time0bk'),
 (0.07827786715673593, 'koi_duration'),
 (0.06879897412278284, 'koi_impact'),
 (0.06525779122182306, 'koi_prad'),
 (0.05401579535716834, 'koi_period'),
 (0.05263932451343239, 'koi_teq'),
 (0.047431538971112824, 'koi_srad'),
 (0.04170402002415659, 'koi_tce_plnt_num'),
 (0.038769825658227515, 'ra'),
 (0.03779262556415685, 'dec'),
 (0.03672762280666021, 'koi_steff'),
 (0.03634602828933427, 'koi_slogg'),
 (0.01434743585377959, 'koi_model_snr')]

In [14]:
#Create the predicted model
y_predicted = model_rf.predict(X_test_scaled)
y_predicted



array(['FALSE POSITIVE', 'FALSE POSITIVE', 'FALSE POSITIVE', ...,
       'CONFIRMED', 'CONFIRMED', 'FALSE POSITIVE'], dtype=object)

In [15]:
# Calculate classification report for model predictions

from sklearn.metrics import classification_report
print(classification_report(y_test,y_predicted))

                precision    recall  f1-score   support

     CANDIDATE       0.59      0.51      0.55       411
     CONFIRMED       0.78      0.81      0.79       484
FALSE POSITIVE       0.80      0.84      0.82       853

      accuracy                           0.75      1748
     macro avg       0.72      0.72      0.72      1748
  weighted avg       0.75      0.75      0.75      1748



In [39]:
# Look at parameters used by our current forest
from pprint import pprint
print('Parameters currently in use:\n')
pprint(model_rf.get_params())

Parameters currently in use:

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 200,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}


In [41]:
# Tuning the Random Forest Model

import numpy as np
from sklearn.model_selection import RandomizedSearchCV

# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]

# Number of features to consider at every split
max_features = ['auto', 'sqrt']

# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)

# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]

# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]

# Method of selecting samples for training each tree
bootstrap = [True, False]

# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
pprint(random_grid)

{'bootstrap': [True, False],
 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
 'max_features': ['auto', 'sqrt'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]}


In [55]:
from sklearn.ensemble import RandomForestRegressor

# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = RandomForestRegressor()
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
model_rf_random = RandomizedSearchCV(estimator = model_rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)

y_binary_train = pd.get_dummies(y_train)

# # Fit the random search model
model_rf_random.fit(X_train_scaled, y_binary_train)

model_rf_random.best_params_

Fitting 3 folds for each of 100 candidates, totalling 300 fits


{'n_estimators': 800,
 'min_samples_split': 5,
 'min_samples_leaf': 4,
 'max_features': 'sqrt',
 'max_depth': 30,
 'bootstrap': False}

In [None]:
# Fitting 3 folds for each of 100 candidates, totalling 300 fits  (Best Parms Output)

# {'n_estimators': 800,
#  'min_samples_split': 5,
#  'min_samples_leaf': 4,
#  'max_features': 'sqrt',
#  'max_depth': 30,
#  'bootstrap': False}

# Model 2: Neural Network Model 


In [18]:
# Once Hot Encoding of Y

from sklearn.preprocessing import LabelEncoder

# Step 1: Label-encode data set
label_encoder_y_train = LabelEncoder()
label_encoder_y_test = LabelEncoder()
label_encoder_y_train.fit(y_train)
label_encoder_y_test.fit(y_test)

encoded_y_train = label_encoder_y_train.transform(y_train)
encoded_y_test = label_encoder_y_test.transform(y_test)
encoded_y_test


array([2, 0, 2, ..., 1, 1, 1])

In [19]:
from keras.utils import to_categorical

# Step 2: One-hot encoding

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

# y_test_categorical


Using TensorFlow backend.


In [20]:
# Defining Model Architecture
from tensorflow.keras.models import Sequential
model_nn = Sequential()

from tensorflow.keras.layers import Dense
number_inputs = 16
number_hidden_nodes = 48
model_nn.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))



In [21]:
# Defining the Output Variables - Candidate, Confirmed, False Positive
number_classes = 3
model_nn.add(Dense(units=number_classes, activation='softmax'))

In [22]:
# Create a Model Summary

model_nn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 48)                816       
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 147       
Total params: 963
Trainable params: 963
Non-trainable params: 0
_________________________________________________________________


In [23]:
# Compiling the model

model_nn.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [24]:
# Fit (train) the model
model_nn.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=1000,
    shuffle=True,
    verbose=2
)

Train on 5243 samples
Epoch 1/1000
5243/5243 - 0s - loss: 1.0200 - accuracy: 0.5052
Epoch 2/1000
5243/5243 - 0s - loss: 0.9586 - accuracy: 0.5184
Epoch 3/1000
5243/5243 - 0s - loss: 0.9075 - accuracy: 0.5655
Epoch 4/1000
5243/5243 - 0s - loss: 0.8767 - accuracy: 0.5834
Epoch 5/1000
5243/5243 - 0s - loss: 0.8583 - accuracy: 0.5918
Epoch 6/1000
5243/5243 - 0s - loss: 0.8466 - accuracy: 0.5911
Epoch 7/1000
5243/5243 - 0s - loss: 0.8354 - accuracy: 0.6031
Epoch 8/1000
5243/5243 - 0s - loss: 0.8291 - accuracy: 0.6006
Epoch 9/1000
5243/5243 - 0s - loss: 0.8224 - accuracy: 0.6079
Epoch 10/1000
5243/5243 - 0s - loss: 0.8158 - accuracy: 0.6107
Epoch 11/1000
5243/5243 - 0s - loss: 0.8108 - accuracy: 0.6163
Epoch 12/1000
5243/5243 - 0s - loss: 0.8065 - accuracy: 0.6164
Epoch 13/1000
5243/5243 - 0s - loss: 0.8018 - accuracy: 0.6195
Epoch 14/1000
5243/5243 - 0s - loss: 0.7968 - accuracy: 0.6271
Epoch 15/1000
5243/5243 - 0s - loss: 0.7943 - accuracy: 0.6250
Epoch 16/1000
5243/5243 - 0s - loss: 0.790

Epoch 131/1000
5243/5243 - 0s - loss: 0.6544 - accuracy: 0.7171
Epoch 132/1000
5243/5243 - 0s - loss: 0.6537 - accuracy: 0.7158
Epoch 133/1000
5243/5243 - 0s - loss: 0.6527 - accuracy: 0.7135
Epoch 134/1000
5243/5243 - 0s - loss: 0.6526 - accuracy: 0.7187
Epoch 135/1000
5243/5243 - 0s - loss: 0.6506 - accuracy: 0.7191
Epoch 136/1000
5243/5243 - 0s - loss: 0.6508 - accuracy: 0.7171
Epoch 137/1000
5243/5243 - 0s - loss: 0.6503 - accuracy: 0.7194
Epoch 138/1000
5243/5243 - 0s - loss: 0.6491 - accuracy: 0.7187
Epoch 139/1000
5243/5243 - 0s - loss: 0.6487 - accuracy: 0.7200
Epoch 140/1000
5243/5243 - 0s - loss: 0.6476 - accuracy: 0.7137
Epoch 141/1000
5243/5243 - 0s - loss: 0.6470 - accuracy: 0.7208
Epoch 142/1000
5243/5243 - 0s - loss: 0.6469 - accuracy: 0.7196
Epoch 143/1000
5243/5243 - 0s - loss: 0.6450 - accuracy: 0.7234
Epoch 144/1000
5243/5243 - 0s - loss: 0.6455 - accuracy: 0.7236
Epoch 145/1000
5243/5243 - 0s - loss: 0.6458 - accuracy: 0.7210
Epoch 146/1000
5243/5243 - 0s - loss: 0.

5243/5243 - 0s - loss: 0.5918 - accuracy: 0.7517
Epoch 260/1000
5243/5243 - 0s - loss: 0.5935 - accuracy: 0.7498
Epoch 261/1000
5243/5243 - 0s - loss: 0.5916 - accuracy: 0.7526
Epoch 262/1000
5243/5243 - 0s - loss: 0.5915 - accuracy: 0.7517
Epoch 263/1000
5243/5243 - 0s - loss: 0.5920 - accuracy: 0.7532
Epoch 264/1000
5243/5243 - 0s - loss: 0.5907 - accuracy: 0.7509
Epoch 265/1000
5243/5243 - 0s - loss: 0.5924 - accuracy: 0.7538
Epoch 266/1000
5243/5243 - 0s - loss: 0.5912 - accuracy: 0.7524
Epoch 267/1000
5243/5243 - 0s - loss: 0.5907 - accuracy: 0.7547
Epoch 268/1000
5243/5243 - 0s - loss: 0.5906 - accuracy: 0.7553
Epoch 269/1000
5243/5243 - 0s - loss: 0.5904 - accuracy: 0.7515
Epoch 270/1000
5243/5243 - 0s - loss: 0.5912 - accuracy: 0.7505
Epoch 271/1000
5243/5243 - 0s - loss: 0.5889 - accuracy: 0.7572
Epoch 272/1000
5243/5243 - 0s - loss: 0.5888 - accuracy: 0.7574
Epoch 273/1000
5243/5243 - 0s - loss: 0.5901 - accuracy: 0.7513
Epoch 274/1000
5243/5243 - 0s - loss: 0.5876 - accuracy

Epoch 388/1000
5243/5243 - 0s - loss: 0.5579 - accuracy: 0.7706
Epoch 389/1000
5243/5243 - 0s - loss: 0.5618 - accuracy: 0.7643
Epoch 390/1000
5243/5243 - 0s - loss: 0.5596 - accuracy: 0.7671
Epoch 391/1000
5243/5243 - 0s - loss: 0.5596 - accuracy: 0.7635
Epoch 392/1000
5243/5243 - 0s - loss: 0.5591 - accuracy: 0.7643
Epoch 393/1000
5243/5243 - 0s - loss: 0.5606 - accuracy: 0.7665
Epoch 394/1000
5243/5243 - 0s - loss: 0.5594 - accuracy: 0.7673
Epoch 395/1000
5243/5243 - 0s - loss: 0.5584 - accuracy: 0.7696
Epoch 396/1000
5243/5243 - 0s - loss: 0.5602 - accuracy: 0.7664
Epoch 397/1000
5243/5243 - 0s - loss: 0.5580 - accuracy: 0.7688
Epoch 398/1000
5243/5243 - 0s - loss: 0.5593 - accuracy: 0.7704
Epoch 399/1000
5243/5243 - 0s - loss: 0.5584 - accuracy: 0.7690
Epoch 400/1000
5243/5243 - 0s - loss: 0.5577 - accuracy: 0.7713
Epoch 401/1000
5243/5243 - 0s - loss: 0.5587 - accuracy: 0.7664
Epoch 402/1000
5243/5243 - 0s - loss: 0.5573 - accuracy: 0.7654
Epoch 403/1000
5243/5243 - 0s - loss: 0.

5243/5243 - 0s - loss: 0.5440 - accuracy: 0.7726
Epoch 517/1000
5243/5243 - 0s - loss: 0.5441 - accuracy: 0.7753
Epoch 518/1000
5243/5243 - 0s - loss: 0.5459 - accuracy: 0.7709
Epoch 519/1000
5243/5243 - 0s - loss: 0.5435 - accuracy: 0.7738
Epoch 520/1000
5243/5243 - 0s - loss: 0.5431 - accuracy: 0.7757
Epoch 521/1000
5243/5243 - 0s - loss: 0.5436 - accuracy: 0.7732
Epoch 522/1000
5243/5243 - 0s - loss: 0.5441 - accuracy: 0.7757
Epoch 523/1000
5243/5243 - 0s - loss: 0.5425 - accuracy: 0.7749
Epoch 524/1000
5243/5243 - 0s - loss: 0.5422 - accuracy: 0.7740
Epoch 525/1000
5243/5243 - 0s - loss: 0.5414 - accuracy: 0.7770
Epoch 526/1000
5243/5243 - 0s - loss: 0.5456 - accuracy: 0.7728
Epoch 527/1000
5243/5243 - 0s - loss: 0.5422 - accuracy: 0.7747
Epoch 528/1000
5243/5243 - 0s - loss: 0.5438 - accuracy: 0.7763
Epoch 529/1000
5243/5243 - 0s - loss: 0.5442 - accuracy: 0.7751
Epoch 530/1000
5243/5243 - 0s - loss: 0.5419 - accuracy: 0.7765
Epoch 531/1000
5243/5243 - 0s - loss: 0.5427 - accuracy

Epoch 645/1000
5243/5243 - 0s - loss: 0.5358 - accuracy: 0.7786
Epoch 646/1000
5243/5243 - 0s - loss: 0.5336 - accuracy: 0.7789
Epoch 647/1000
5243/5243 - 0s - loss: 0.5345 - accuracy: 0.7788
Epoch 648/1000
5243/5243 - 0s - loss: 0.5327 - accuracy: 0.7772
Epoch 649/1000
5243/5243 - 0s - loss: 0.5341 - accuracy: 0.7753
Epoch 650/1000
5243/5243 - 0s - loss: 0.5358 - accuracy: 0.7726
Epoch 651/1000
5243/5243 - 0s - loss: 0.5317 - accuracy: 0.7784
Epoch 652/1000
5243/5243 - 0s - loss: 0.5367 - accuracy: 0.7767
Epoch 653/1000
5243/5243 - 0s - loss: 0.5350 - accuracy: 0.7772
Epoch 654/1000
5243/5243 - 0s - loss: 0.5334 - accuracy: 0.7784
Epoch 655/1000
5243/5243 - 0s - loss: 0.5335 - accuracy: 0.7788
Epoch 656/1000
5243/5243 - 0s - loss: 0.5316 - accuracy: 0.7784
Epoch 657/1000
5243/5243 - 0s - loss: 0.5320 - accuracy: 0.7801
Epoch 658/1000
5243/5243 - 0s - loss: 0.5333 - accuracy: 0.7780
Epoch 659/1000
5243/5243 - 0s - loss: 0.5368 - accuracy: 0.7795
Epoch 660/1000
5243/5243 - 0s - loss: 0.

5243/5243 - 0s - loss: 0.5267 - accuracy: 0.7807
Epoch 774/1000
5243/5243 - 0s - loss: 0.5265 - accuracy: 0.7803
Epoch 775/1000
5243/5243 - 0s - loss: 0.5286 - accuracy: 0.7747
Epoch 776/1000
5243/5243 - 0s - loss: 0.5282 - accuracy: 0.7820
Epoch 777/1000
5243/5243 - 0s - loss: 0.5285 - accuracy: 0.7833
Epoch 778/1000
5243/5243 - 0s - loss: 0.5278 - accuracy: 0.7809
Epoch 779/1000
5243/5243 - 0s - loss: 0.5279 - accuracy: 0.7807
Epoch 780/1000
5243/5243 - 0s - loss: 0.5260 - accuracy: 0.7841
Epoch 781/1000
5243/5243 - 0s - loss: 0.5277 - accuracy: 0.7807
Epoch 782/1000
5243/5243 - 0s - loss: 0.5282 - accuracy: 0.7786
Epoch 783/1000
5243/5243 - 0s - loss: 0.5265 - accuracy: 0.7824
Epoch 784/1000
5243/5243 - 0s - loss: 0.5252 - accuracy: 0.7816
Epoch 785/1000
5243/5243 - 0s - loss: 0.5247 - accuracy: 0.7816
Epoch 786/1000
5243/5243 - 0s - loss: 0.5252 - accuracy: 0.7812
Epoch 787/1000
5243/5243 - 0s - loss: 0.5292 - accuracy: 0.7803
Epoch 788/1000
5243/5243 - 0s - loss: 0.5277 - accuracy

Epoch 902/1000
5243/5243 - 0s - loss: 0.5218 - accuracy: 0.7850
Epoch 903/1000
5243/5243 - 0s - loss: 0.5240 - accuracy: 0.7818
Epoch 904/1000
5243/5243 - 0s - loss: 0.5257 - accuracy: 0.7829
Epoch 905/1000
5243/5243 - 0s - loss: 0.5205 - accuracy: 0.7829
Epoch 906/1000
5243/5243 - 0s - loss: 0.5191 - accuracy: 0.7845
Epoch 907/1000
5243/5243 - 0s - loss: 0.5207 - accuracy: 0.7843
Epoch 908/1000
5243/5243 - 0s - loss: 0.5204 - accuracy: 0.7841
Epoch 909/1000
5243/5243 - 0s - loss: 0.5197 - accuracy: 0.7849
Epoch 910/1000
5243/5243 - 0s - loss: 0.5194 - accuracy: 0.7837
Epoch 911/1000
5243/5243 - 0s - loss: 0.5198 - accuracy: 0.7824
Epoch 912/1000
5243/5243 - 0s - loss: 0.5194 - accuracy: 0.7845
Epoch 913/1000
5243/5243 - 0s - loss: 0.5208 - accuracy: 0.7856
Epoch 914/1000
5243/5243 - 0s - loss: 0.5183 - accuracy: 0.7856
Epoch 915/1000
5243/5243 - 0s - loss: 0.5189 - accuracy: 0.7833
Epoch 916/1000
5243/5243 - 0s - loss: 0.5196 - accuracy: 0.7799
Epoch 917/1000
5243/5243 - 0s - loss: 0.

<tensorflow.python.keras.callbacks.History at 0x1530dc99b00>

In [25]:
# Evaluate the model using the testing data
model_loss, model_accuracy = model_nn.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

1748/1748 - 0s - loss: 0.6009 - accuracy: 0.7489
Loss: 0.6009191288282452, Accuracy: 0.7488558292388916


In [None]:
# Tuning the NN Model

from keras.wrappers.scikit_learn import KerasClassifier



# Hyperparameter Tuning

Use `GridSearchCV` to tune the model's parameters

In [None]:
#Tuning the Random Forest Model 



In [32]:
# Create the GridSearchCV model

# Create the GridSearch estimator along with a parameter object containing the values to adjust
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10, 50],
              'gamma': [0.0001, 0.0005, 0.001, 0.005]}
# grid_rf = GridSearchCV(model_rf, param_grid, verbose=3)
grid_nn = GridSearchCV(model_nn, param_grid, verbose=3)

grid_nn

GridSearchCV(estimator=<tensorflow.python.keras.engine.sequential.Sequential object at 0x000001530DB0C518>,
             param_grid={'C': [1, 5, 10, 50],
                         'gamma': [0.0001, 0.0005, 0.001, 0.005]},
             verbose=3)

In [33]:
# Train the model with GridSearch

# This will take the SVC model and try each combination of parameters
# grid_rf.fit(X_train, y_train)
grid_nn.fit(X_train, y_train)


TypeError: If no scoring is specified, the estimator passed should have a 'score' method. The estimator <tensorflow.python.keras.engine.sequential.Sequential object at 0x000001530DB0C518> does not.

In [29]:
# List the best parameters for this dataset
print(grid_rf.best_params_)
print(grid_nn.best_params_)



AttributeError: 'GridSearchCV' object has no attribute 'best_params_'

In [None]:
# Make predictions with the hypertuned model
prediction_rf = grid_rf.predict(X_test)
prediction_nn = grid_nn.predict(X_test)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,prediction_rf))
print(classification_report(y_test,prediction_nn))

# Save the Model

In [None]:
# save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
# if joblib fails to import, try running the command to install in terminal/git-bash
import joblib
filename = 'your_name.sav'
joblib.dump(your_model, filename)