### Separate the Features (X) from the Target (y)

In [None]:
y = keplerProcessed_df["Exoplanet Archive Disposition"]
X = keplerProcessed_df.drop(["Exoplanet Archive Disposition"], axis =1)

# Plot histogram grid
X.hist(figsize=(14,14))

plt.show()

### Split our data into training and testing

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)
X_train.shape

### Scale data

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# <span style="color:slateblue"><b>Supervised ML Logistic Regression Model

In [None]:
# Create the model
classifier = LogisticRegression(solver='lbfgs',
                                max_iter=200,
                                random_state=1)

In [None]:
# Train model
classifier.fit(X_train_scaled, y_train)

In [None]:
# List the features sorted in descending order by feature importance
x = sorted(zip(classifier.coef_[0], X.columns), reverse=True)
featureImp = pd.DataFrame(np.array(x).reshape(len(x),2), columns = list(["Importance","Feature"]))
featureImp

In [None]:
# Plot feature importance
plot_feature_Importance = px.bar(featureImp,
                                 x="Feature", 
                                 y="Importance",
                                 labels={
                                    "Feature": "Feature",
                                    "Importance": "Importance"
                                 }
                                )
plot_feature_Importance.update_layout(
    title={
        'text': "Feature Importance",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

plot_feature_Importance.show()

In [None]:
# Make predictions
y_pred = classifier.predict(X_test_scaled)
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
results

In [None]:
# from sklearn.metrics import accuracy_score
print(accuracy_score(y_test,y_pred))

### Confusion Matrix

In [None]:
# Calculating the confusion matrix.
cm = confusion_matrix(y_test, y_pred)

# Create a DataFrame from the confusion matrix
confusion_matrix_df = pd.DataFrame(cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])
confusion_matrix_df

### Classfication Report

In [None]:
report = classification_report(y_test, y_pred)
print(report)