In [1]:
#Using Logistic Regression

import numpy as np
import pandas as pd  # For data handling
from sklearn.datasets import fetch_openml  # To load the Shuttle dataset from OpenML
from sklearn.linear_model import LogisticRegression  # The logistic regression model
from sklearn.model_selection import train_test_split  # To split data into training and testing sets
from sklearn.metrics import accuracy_score, classification_report  # For model evaluation
import warnings

warnings.filterwarnings('ignore')  # Suppress warnings for cleaner output

# Step 1: Data Preparation
# ------------------------------------
# Load the Shuttle dataset (OpenML ID for Shuttle is typically 1469)
shuttle = fetch_openml(name='shuttle', version=1, as_frame=True)
df = shuttle.frame

# The dataset has a target column named 'class' and several feature columns.
# For aerospace context: the Shuttle dataset is from NASA and reflects shuttle operational conditions.
# Convert the multi-class target into a binary classification:
#   - Label safe condition (assumed to be class '1') as 1.
#   - All other classes as 0 (unsafe or abnormal).
df['binary_class'] = (df['class'] == '1').astype(int)  # Convert safe (class '1') to 1, others to 0

# Define feature matrix X and target vector y.
# We'll drop the original 'class' column and use the remaining features.
X = df.drop(columns=['class', 'binary_class'])
y = df['binary_class']

# Step 2: Model Training
# ------------------------------------
# Split the data into training and testing sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Logistic Regression model.
log_reg = LogisticRegression(max_iter=200)  # Increase iterations if needed for convergence
log_reg.fit(X_train, y_train)

# Step 3: Evaluation and Visualization
# ------------------------------------
# Predict the test set labels.
y_pred = log_reg.predict(X_test)

# Evaluate model performance using accuracy and a classification report.
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Logistic Regression Accuracy: {:.3f}".format(accuracy))
print("\nClassification Report:\n", report)


Logistic Regression Accuracy: 0.966

Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.90      0.92      2492
           1       0.97      0.98      0.98      9108

    accuracy                           0.97     11600
   macro avg       0.96      0.94      0.95     11600
weighted avg       0.97      0.97      0.97     11600



In [1]:
# Using K-Nearest Neighbor

import numpy as np
import pandas as pd  # For data manipulation
from sklearn.datasets import fetch_openml  # To load the Shuttle dataset from OpenML
from sklearn.neighbors import KNeighborsClassifier  # The KNN classifier
from sklearn.model_selection import train_test_split  # For splitting the dataset
from sklearn.metrics import accuracy_score, classification_report  # For model evaluation
import warnings

warnings.filterwarnings('ignore')  # Suppress warnings for cleaner output

# Step 1: Data Preparation
# ---------------------------
# Load the Shuttle dataset from OpenML (name: 'shuttle', version: 1)
shuttle = fetch_openml(name='shuttle', version=1, as_frame=True)
df = shuttle.frame

# The Shuttle dataset has a target column 'class' representing multiple operational states.
# For this example, we convert it into a binary classification problem:
# - Label '1' (assumed safe) as 1.
# - All other classes as 0 (unsafe).
df['binary_class'] = (df['class'] == '1').astype(int)

# Define the feature matrix X and the target vector y.
# We drop the original 'class' column and use all remaining features.
X = df.drop(columns=['class', 'binary_class'])
y = df['binary_class']

# Step 2: Model Training
# ---------------------------
# Split the dataset into training and testing sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Instantiate the KNN classifier.
# n_neighbors is set to 5 by default; this can be tuned for optimal performance.
knn = KNeighborsClassifier(n_neighbors=5)

# Train the KNN classifier on the training data.
knn.fit(X_train, y_train)

# Step 3: Evaluation and Visualization
# ---------------------------
# Predict the labels for the test set.
y_pred = knn.predict(X_test)

# Evaluate the model's performance using accuracy and a detailed classification report.
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("K-Nearest Neighbors (KNN) Classification Accuracy: {:.3f}".format(accuracy))
print("\nClassification Report:\n", report)


K-Nearest Neighbors (KNN) Classification Accuracy: 0.999

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2492
           1       1.00      1.00      1.00      9108

    accuracy                           1.00     11600
   macro avg       1.00      1.00      1.00     11600
weighted avg       1.00      1.00      1.00     11600

