# Feature selection based on accuracy

In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification

In [2]:
# Create custom dataset
X, y = make_classification(n_samples=800, n_features=10, n_informative=5, n_redundant=0, random_state=90)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
# Forward Feature Selection

In [4]:
X_train.shape[1]

10

In [5]:
y

array([0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
       1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0,
       1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0,
       1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,

In [6]:
X_train.shape[1]

10

In [7]:
# Implement forward feature selection
selected_features = []
for i in range(X_train.shape[1]):
    best_accuracy = 0
    best_feature = None
    for j in range(X_train.shape[1]):
#         print(j)
        if j not in selected_features:
            features = selected_features + [j] # "+" for temporary append
#             print(features)
            model = LogisticRegression()
            model.fit(X_train[:, features], y_train)
            accuracy = model.score(X_test[:, features], y_test)
#             print(accuracy)
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_feature = j
    selected_features.append(best_feature)
    print("Selected Features (Forward):", selected_features, "Score:", best_accuracy)

Selected Features (Forward): [0] Score: 0.70625
Selected Features (Forward): [0, 6] Score: 0.7375
Selected Features (Forward): [0, 6, 5] Score: 0.80625
Selected Features (Forward): [0, 6, 5, 8] Score: 0.81875
Selected Features (Forward): [0, 6, 5, 8, 1] Score: 0.81875
Selected Features (Forward): [0, 6, 5, 8, 1, 7] Score: 0.81875
Selected Features (Forward): [0, 6, 5, 8, 1, 7, 2] Score: 0.81875
Selected Features (Forward): [0, 6, 5, 8, 1, 7, 2, 3] Score: 0.81875
Selected Features (Forward): [0, 6, 5, 8, 1, 7, 2, 3, 4] Score: 0.81875
Selected Features (Forward): [0, 6, 5, 8, 1, 7, 2, 3, 4, 9] Score: 0.81875


In [8]:
# Backward Feature Selection

In [9]:
range(X_train.shape[1])

range(0, 10)

In [10]:
list(range(0,10))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [11]:
# Implement backward feature elimination
selected_features = list(range(X_train.shape[1]))
model = LogisticRegression()
model.fit(X_train[:, selected_features], y_train)
accuracy = model.score(X_test[:, selected_features], y_test)
print("Selected Features (Backward):", selected_features, "Score:", accuracy)

for i in range(X_train.shape[1] - 1):
    worst_accuracy = 1
    worst_feature = None
    for j in selected_features:
#         print(j)
        features = selected_features.copy()
#         print(features)
        features.remove(j)
        model = LogisticRegression()
        model.fit(X_train[:, features], y_train)
        accuracy = model.score(X_test[:, features], y_test)
#         print(accuracy)
        if accuracy < worst_accuracy:
            worst_accuracy = accuracy
            worst_feature = j
    selected_features.remove(worst_feature)
    print("Selected Features (Backward):", selected_features, "Score:", worst_accuracy)

Selected Features (Backward): [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] Score: 0.81875
Selected Features (Backward): [1, 2, 3, 4, 5, 6, 7, 8, 9] Score: 0.725
Selected Features (Backward): [1, 2, 3, 4, 5, 6, 8, 9] Score: 0.6625
Selected Features (Backward): [1, 2, 3, 4, 6, 8, 9] Score: 0.525
Selected Features (Backward): [1, 2, 3, 4, 6, 9] Score: 0.4375
Selected Features (Backward): [1, 3, 4, 6, 9] Score: 0.3875
Selected Features (Backward): [3, 4, 6, 9] Score: 0.38125
Selected Features (Backward): [3, 4, 6] Score: 0.39375
Selected Features (Backward): [4, 6] Score: 0.41875
Selected Features (Backward): [4] Score: 0.4125
