In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import (
    SelectKBest,
    f_classif,
    RFE,
    SelectFromModel
)
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

data = pd.read_csv('/content/breast-cancer.csv')


X = data.drop(['diagnosis'], axis=1)
y = data['diagnosis'].map({'M': 1, 'B': 0})


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


selector_filter = SelectKBest(score_func=f_classif, k=5)
X_train_filter = selector_filter.fit_transform(X_train_scaled, y_train)
X_test_filter = selector_filter.transform(X_test_scaled)


filter_feature_names = X.columns[selector_filter.get_support()].tolist()
print("Filter Method - Selected Features:", filter_feature_names)


selector_wrapper = RFE(estimator=LogisticRegression(), n_features_to_select=5)
X_train_wrapper = selector_wrapper.fit_transform(X_train_scaled, y_train)
X_test_wrapper = selector_wrapper.transform(X_test_scaled)

wrapper_feature_names = X.columns[selector_wrapper.support_].tolist()
print("Wrapper Method - Selected Features:", wrapper_feature_names)


selector_embedded = SelectFromModel(RandomForestClassifier(), prefit=False, max_features=5)
X_train_embedded = selector_embedded.fit_transform(X_train_scaled, y_train)
X_test_embedded = selector_embedded.transform(X_test_scaled)


embedded_feature_names = X.columns[selector_embedded.get_support()].tolist()
print("Embedded Method - Selected Features:", embedded_feature_names)


clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_embedded, y_train)


y_pred = clf.predict(X_test_embedded)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Filter Method - Selected Features: ['perimeter_mean', 'concave points_mean', 'radius_worst', 'perimeter_worst', 'concave points_worst']
Wrapper Method - Selected Features: ['radius_se', 'radius_worst', 'texture_worst', 'area_worst', 'concave points_worst']
Embedded Method - Selected Features: ['concave points_mean', 'radius_worst', 'perimeter_worst', 'area_worst', 'concave points_worst']

Model Accuracy: 95.61%

Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.97      0.97        71
           1       0.95      0.93      0.94        43

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

