In [35]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)
from tensorflow import random
random.set_seed(1)

In [1]:
# Pandas Import 
import pandas as pd

In [13]:
# Read in exoplanet data 
exoplanet_df = pd.read_csv("exoplanet_data.csv")
exoplanet_df.head()

Unnamed: 0,rowid,kepid,kepoi_name,kepler_name,koi_disposition,koi_pdisposition,koi_score,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,1,10797460,K00752.01,Kepler-227 b,CONFIRMED,CANDIDATE,1.0,0,0,0,...,-81.0,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,2,10797460,K00752.02,Kepler-227 c,CONFIRMED,CANDIDATE,0.969,0,0,0,...,-81.0,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
2,3,10811496,K00753.01,,FALSE POSITIVE,FALSE POSITIVE,0.0,0,1,0,...,-176.0,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
3,4,10848459,K00754.01,,FALSE POSITIVE,FALSE POSITIVE,0.0,0,1,0,...,-174.0,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
4,5,10854555,K00755.01,Kepler-664 b,CONFIRMED,CANDIDATE,1.0,0,0,0,...,-211.0,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509


In [19]:
# Rename columns 
exoplanet_df = exoplanet_df.rename(columns={"kepid":"Kepler ID", \
                                            "kepoi_name":"Kepler Object of Interest Name",\
                                            "kepler_name":"Kepler Name (Confirmed Planet)",\
                                            "koi_disposition":"KOI Disposition",\
                                            "koi_score":"KOI Disposition Score (Level of confidence)",\
                                            "koi_fpflag_nt":"Not Transit-Like Flag",\
                                            "koi_fpflag_ss":"Stellar Eclipse Flag",\
                                            "koi_fpflag_co":"Centroid Offset Flag",\
                                            "koi_fpflag_ec":"Ephemeris Contamination Flag",\
                                            "koi_period":"Orbital Period",\
                                            "koi_period_err1":"Orbital Period Positive Uncertainties",\
                                            "koi_period_err2":"Orbital Period Negative Uncertainties",\
                                            "koi_time0bk":"Transit Epoch",\
                                            "koi_time0bk_err1":"Transit Epoch Positive Uncertainties",\
                                            "koi_time0bk_err2":"Transit Epoch Negative Uncertainties",\
                                            "koi_impact":"Impact Parameter",\
                                            "koi_impact_err1":"Impact Parameter Positive Uncertainties",\
                                            "koi_impact_err2":"Impact Parameter Negative Uncertainties",\
                                            "koi_depth":"Transit Depth",\
                                            "koi_depth_err1":"Transit Depth Positive Uncertainties",\
                                            "koi_depth_err2":"Transit Depth Negative Uncertainties",\
                                            "koi_prad":"Planetary Radius",\
                                            "koi_prad_err1":"Planetary Radius Positive Uncertainties",\
                                            "koi_prad_err2":"Planetary Radius Negative Uncertainties",\
                                            "koi_teq":"Equilibrium Teperature (Kelvin)",\
                                            "koi_teq_err1":"Equilibrium Teperature Positive Uncertainties",\
                                            "koi_teq_err2":"Equilibrium Teperature Negative Uncertainties",\
                                            "koi_steff":"Stellar Effective Temperature (Kelvin)",\
                                            "koi_steff_err1":"Stellar Effective Temperature Positive Uncertainties",\
                                            "koi_steff_err2":"Stellar Effective Temperature Negative Uncertainties",\
                                            "koi_slogg":"Stellar Surface Gravity (Base-10 Log)",\
                                            "koi_slogg_err1":"Stellar Surface Gravity Positive Uncertainties",\
                                            "koi_slogg_err2":"Stellar Surface Gravity Negative Uncertainties",\
                                            "koi_srad":"Stellar Radius",\
                                            "koi_srad_err1":"Stellar Radius Positive Uncertainties",\
                                            "koi_srad_err2":"Stellar Radious Negative Uncertainties",\
                                            "ra":"KIC Right Ascension",\
                                            "dec":"KIC Declination",\
                                            "koi_depth":"Transit Depth",\
                                            "koi_kepmag":"Kepler-band(mag)"\
                                            })

# exoplanet_df.columns
# remove unnecessary columns
exoplanet_df = exoplanet_df[['Kepler ID', 'Kepler Object of Interest Name',\
       'Kepler Name (Confirmed Planet)', 'KOI Disposition',\
       'KOI Disposition Score (Level of confidence)',\
       'Not Transit-Like Flag',\
       'Stellar Eclipse Flag',\
       'Centroid Offset Flag',\
       'Ephemeris Contamination Flag', 'Orbital Period',\
       'Orbital Period Positive Uncertainties',\
       'Orbital Period Negative Uncertainties', 'Transit Epoch',\
       'Transit Epoch Positive Uncertainties',\
       'Transit Epoch Negative Uncertainties', 'Impact Parameter',\
       'Impact Parameter Positive Uncertainties',\
       'Impact Parameter Negative Uncertainties',\
       'Transit Depth',\
       'Transit Depth Positive Uncertainties',\
       'Transit Depth Negative Uncertainties', 'Planetary Radius',\
       'Planetary Radius Positive Uncertainties',\
       'Planetary Radius Negative Uncertainties',\
       'Equilibrium Teperature (Kelvin)',\
       'Equilibrium Teperature Positive Uncertainties',\
       'Equilibrium Teperature Negative Uncertainties',\
       'Stellar Effective Temperature (Kelvin)',\
       'Stellar Effective Temperature Positive Uncertainties',\
       'Stellar Effective Temperature Negative Uncertainties',\
       'Stellar Surface Gravity (Base-10 Log)',\
       'Stellar Surface Gravity Positive Uncertainties',\
       'Stellar Surface Gravity Negative Uncertainties', 'Stellar Radius',\
       'Stellar Radius Positive Uncertainties',\
       'Stellar Radious Negative Uncertainties', 'KIC Right Ascension',\
       'KIC Declination', 'Kepler-band(mag)']]

exoplanet_df


Unnamed: 0,Kepler ID,Kepler Object of Interest Name,Kepler Name (Confirmed Planet),KOI Disposition,KOI Disposition Score (Level of confidence),Not Transit-Like Flag,Stellar Eclipse Flag,Centroid Offset Flag,Ephemeris Contamination Flag,Orbital Period,...,Stellar Effective Temperature Negative Uncertainties,Stellar Surface Gravity (Base-10 Log),Stellar Surface Gravity Positive Uncertainties,Stellar Surface Gravity Negative Uncertainties,Stellar Radius,Stellar Radius Positive Uncertainties,Stellar Radious Negative Uncertainties,KIC Right Ascension,KIC Declination,Kepler-band(mag)
0,10797460,K00752.01,Kepler-227 b,CONFIRMED,1.000,0,0,0,0,9.488036,...,-81.0,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,10797460,K00752.02,Kepler-227 c,CONFIRMED,0.969,0,0,0,0,54.418383,...,-81.0,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
2,10811496,K00753.01,,FALSE POSITIVE,0.000,0,1,0,0,19.899140,...,-176.0,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
3,10848459,K00754.01,,FALSE POSITIVE,0.000,0,1,0,0,1.736952,...,-174.0,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.285210,15.597
4,10854555,K00755.01,Kepler-664 b,CONFIRMED,1.000,0,0,0,0,2.525592,...,-211.0,4.438,0.070,-0.210,1.046,0.334,-0.133,288.75488,48.226200,15.509
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9559,10031643,K07984.01,,FALSE POSITIVE,0.000,0,0,0,1,8.589871,...,-152.0,4.296,0.231,-0.189,1.088,0.313,-0.228,298.74921,46.973351,14.478
9560,10090151,K07985.01,,FALSE POSITIVE,0.000,0,1,1,0,0.527699,...,-166.0,4.529,0.035,-0.196,0.903,0.237,-0.079,297.18875,47.093819,14.082
9561,10128825,K07986.01,,CANDIDATE,0.497,0,0,0,0,1.739849,...,-220.0,4.444,0.056,-0.224,1.031,0.341,-0.114,286.50937,47.163219,14.757
9562,10147276,K07987.01,,FALSE POSITIVE,0.021,0,0,1,0,0.681402,...,-236.0,4.447,0.056,-0.224,1.041,0.341,-0.114,294.16489,47.176281,15.385


In [41]:
exoplanet_df.columns

Index(['Kepler ID', 'Kepler Object of Interest Name',
       'Kepler Name (Confirmed Planet)', 'KOI Disposition',
       'KOI Disposition Score (Level of confidence)', 'Not Transit-Like Flag',
       'Stellar Eclipse Flag', 'Centroid Offset Flag',
       'Ephemeris Contamination Flag', 'Orbital Period',
       'Orbital Period Positive Uncertainties',
       'Orbital Period Negative Uncertainties', 'Transit Epoch',
       'Transit Epoch Positive Uncertainties',
       'Transit Epoch Negative Uncertainties', 'Impact Parameter',
       'Impact Parameter Positive Uncertainties',
       'Impact Parameter Negative Uncertainties', 'Transit Depth',
       'Transit Depth Positive Uncertainties',
       'Transit Depth Negative Uncertainties', 'Planetary Radius',
       'Planetary Radius Positive Uncertainties',
       'Planetary Radius Negative Uncertainties',
       'Equilibrium Teperature (Kelvin)',
       'Equilibrium Teperature Positive Uncertainties',
       'Equilibrium Teperature Negative

In [None]:
# Notes on columns: 
    # note from NASA data documentation: "Given the different input data and analysis methods used for different deliveries, users are advised to use the uncertainties to understand the significance of a given parameter value." Based on this note, I have included the uncertainties measurements. 
    # koi_pdisposition - didn't use because these subjects are still in process of being tested/examined/reviewed
    # Disposition score =  A value between 0 and 1 that indicates the confidence in the KOI disposition.
    # koi_duration - didn't use because this was simple the amount of time the exoplanet was observed, not an actual characteristic or measurement of the exoplanet
    # koi_insol (and error values) - didn't use because the error values were not actually present in the original NASA dataset, and the insolation flux is another way to gvie equilibrium temperature, which we already have 
    # koi_model_snr - didn't use bc this is an analysis tool used by the exoplanet project, not an actual characteristic or measurement of the exoplanet
    # koi_tce_plnt_num - didn't use bc is just an identifying number rather than a characteristic or measurment of the exoplanet
    # koi_tce_dlivname - didn't use bc is just an identifying name rather than a characteristic or measurment of the exoplanet

    

In [47]:
# Assign the appropriate columns to X and y values 
X = exoplanet_df.drop(['Kepler ID', 'Kepler Object of Interest Name','Kepler Name (Confirmed Planet)', 'KOI Disposition'], axis=1)
y = exoplanet_df["KOI Disposition"]

In [52]:
# # Apply One-hot encoding to y values 

# # Label-encode data set
# from sklearn.preprocessing import LabelEncoder
# label_encoder = LabelEncoder()
# label_encoder.fit(y)
# encoded_y = label_encoder.transform(y)

# for label, original_class in zip(encoded_y, y):
#     print('Original Class: ' + str(original_class))
#     print('Encoded Label: ' + str(label))
#     print('-' * 12)

# # Finalize One-hot encoding
# from tensorflow.keras.utils import to_categorical

# one_hot_y = to_categorical(encoded_y)
# one_hot_y

In [53]:
# Confirm shape of X and y
print(X.shape, y.shape)

(9564, 35) (9564,)


In [54]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


array([[0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       ...,
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.]], dtype=float32)

In [None]:
# first, create a normal neural network with 2 inputs, 6 hidden nodes, and 2 outputs
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=6, activation='relu', input_dim=2))
model.add(Dense(units=2, activation='softmax'))