In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.feature_selection import SelectKBest, chi2, RFE
from sklearn.linear_model import Lasso, LogisticRegression

# Load the dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/sonar.csv"
data = pd.read_csv(url, header=None)

# Assign column names
data.columns = [f"V{i}" for i in range(1, 61)] + ["Target"]

# Encode target variable
data['Target'] = data['Target'].map({'R': 0, 'M': 1})

# Split features and target variable
X = data.drop('Target', axis=1)
y = data['Target']

# Standardize features using MinMaxScaler for chi2 compatibility
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initial Model
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

# Evaluate the initial model
print("Initial Model Performance")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1-Score:", f1_score(y_test, y_pred))

# Filter Method - SelectKBest
filter_selector = SelectKBest(chi2, k=10).fit(X_train, y_train)
X_train_filter = filter_selector.transform(X_train)
X_test_filter = filter_selector.transform(X_test)

# Train and evaluate KNN model with filter method
knn.fit(X_train_filter, y_train)
y_pred_filter = knn.predict(X_test_filter)

print("Filter Method Performance")
print("Accuracy:", accuracy_score(y_test, y_pred_filter))
print("Precision:", precision_score(y_test, y_pred_filter))
print("Recall:", recall_score(y_test, y_pred_filter))
print("F1-Score:", f1_score(y_test, y_pred_filter))

# Wrapper Method - RFE with Logistic Regression
logreg = LogisticRegression(max_iter=1000)
rfe_selector = RFE(logreg, n_features_to_select=10).fit(X_train, y_train)
X_train_wrapper = rfe_selector.transform(X_train)
X_test_wrapper = rfe_selector.transform(X_test)

# Train and evaluate KNN model with wrapper method
knn.fit(X_train_wrapper, y_train)
y_pred_wrapper = knn.predict(X_test_wrapper)

print("Wrapper Method Performance")
print("Accuracy:", accuracy_score(y_test, y_pred_wrapper))
print("Precision:", precision_score(y_test, y_pred_wrapper))
print("Recall:", recall_score(y_test, y_pred_wrapper))
print("F1-Score:", f1_score(y_test, y_pred_wrapper))

# Embedded Method - Lasso
lasso = Lasso(alpha=0.01).fit(X_train, y_train)
lasso_selector = np.abs(lasso.coef_) > 0
X_train_embedded = X_train[:, lasso_selector]
X_test_embedded = X_test[:, lasso_selector]

# Train and evaluate KNN model with embedded method
knn.fit(X_train_embedded, y_train)
y_pred_embedded = knn.predict(X_test_embedded)

print("Embedded Method Performance")
print("Accuracy:", accuracy_score(y_test, y_pred_embedded))
print("Precision:", precision_score(y_test, y_pred_embedded))
print("Recall:", recall_score(y_test, y_pred_embedded))
print("F1-Score:", f1_score(y_test, y_pred_embedded))

# Summary of Findings
initial_performance = {
    "Accuracy": accuracy_score(y_test, y_pred),
    "Precision": precision_score(y_test, y_pred),
    "Recall": recall_score(y_test, y_pred),
    "F1-Score": f1_score(y_test, y_pred)
}

filter_performance = {
    "Accuracy": accuracy_score(y_test, y_pred_filter),
    "Precision": precision_score(y_test, y_pred_filter),
    "Recall": recall_score(y_test, y_pred_filter),
    "F1-Score": f1_score(y_test, y_pred_filter)
}

wrapper_performance = {
    "Accuracy": accuracy_score(y_test, y_pred_wrapper),
    "Precision": precision_score(y_test, y_pred_wrapper),
    "Recall": recall_score(y_test, y_pred_wrapper),
    "F1-Score": f1_score(y_test, y_pred_wrapper)
}

embedded_performance = {
    "Accuracy": accuracy_score(y_test, y_pred_embedded),
    "Precision": precision_score(y_test, y_pred_embedded),
    "Recall": recall_score(y_test, y_pred_embedded),
    "F1-Score": f1_score(y_test, y_pred_embedded)
}

print("Initial Model Performance:", initial_performance)
print("Filter Method Performance:", filter_performance)
print("Wrapper Method Performance:", wrapper_performance)
print("Embedded Method Performance:", embedded_performance)


Initial Model Performance
Accuracy: 0.8571428571428571
Precision: 0.9545454545454546
Recall: 0.8076923076923077
F1-Score: 0.875
Filter Method Performance
Accuracy: 0.7857142857142857
Precision: 0.8695652173913043
Recall: 0.7692307692307693
F1-Score: 0.8163265306122449
Wrapper Method Performance
Accuracy: 0.8571428571428571
Precision: 1.0
Recall: 0.7692307692307693
F1-Score: 0.8695652173913044
Embedded Method Performance
Accuracy: 0.8333333333333334
Precision: 1.0
Recall: 0.7307692307692307
F1-Score: 0.8444444444444443
Initial Model Performance: {'Accuracy': 0.8571428571428571, 'Precision': 0.9545454545454546, 'Recall': 0.8076923076923077, 'F1-Score': 0.875}
Filter Method Performance: {'Accuracy': 0.7857142857142857, 'Precision': 0.8695652173913043, 'Recall': 0.7692307692307693, 'F1-Score': 0.8163265306122449}
Wrapper Method Performance: {'Accuracy': 0.8571428571428571, 'Precision': 1.0, 'Recall': 0.7692307692307693, 'F1-Score': 0.8695652173913044}
Embedded Method Performance: {'Accurac