In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import accuracy_score

# Load dataset
dataset1 = pd.read_csv("prep.csv", index_col=None)
df2 = dataset1
df2 = pd.get_dummies(df2, drop_first=True)

# Define the independent and dependent variables
indep_X = df2.drop('classification_yes', axis=1)
dep_Y = df2['classification_yes']

# Split data
X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Function to perform feature selection and model evaluation
def evaluate_model_with_selection(model, X_train, y_train, X_test, y_test, direction='forward', n_features=4):
    sfs = SequentialFeatureSelector(model, n_features_to_select=n_features, direction=direction, scoring='accuracy', cv=5)
    sfs = sfs.fit(X_train, y_train)
    
    # Get selected features
    selected_features = sfs.get_support()
    print(f"Selected features for {model.__class__.__name__} ({direction}): {indep_X.columns[selected_features]}")
    
    # Select the features and fit the model
    X_train_selected = X_train[:, selected_features]
    X_test_selected = X_test[:, selected_features]
    
    # Fit the model and make predictions
    model.fit(X_train_selected, y_train)
    y_pred = model.predict(X_test_selected)
    
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model Accuracy ({direction}): {accuracy * 100:.2f}%")
    return accuracy

# Models for feature selection
log_model = LogisticRegression(solver='lbfgs', random_state=0)
svc_model = SVC(kernel='linear', random_state=0)
dt_model = DecisionTreeClassifier(random_state=0)
rf_model = RandomForestClassifier(random_state=0)

# Perform Forward and Backward feature selection for each model
print("Forward & Backward Selection Results:")

# Logistic Regression - Forward and Backward Selection
evaluate_model_with_selection(log_model, X_train, y_train, X_test, y_test, direction='forward', n_features=4)
evaluate_model_with_selection(log_model, X_train, y_train, X_test, y_test, direction='backward', n_features=4)

# SVM Linear - Forward and Backward Selection
evaluate_model_with_selection(svc_model, X_train, y_train, X_test, y_test, direction='forward', n_features=4)
evaluate_model_with_selection(svc_model, X_train, y_train, X_test, y_test, direction='backward', n_features=4)

# Decision Tree - Forward and Backward Selection
evaluate_model_with_selection(dt_model, X_train, y_train, X_test, y_test, direction='forward', n_features=4)
evaluate_model_with_selection(dt_model, X_train, y_train, X_test, y_test, direction='backward', n_features=4)

# Random Forest - Forward and Backward Selection
evaluate_model_with_selection(rf_model, X_train, y_train, X_test, y_test, direction='forward', n_features=4)
evaluate_model_with_selection(rf_model, X_train, y_train, X_test, y_test, direction='backward', n_features=4)


Forward Selection Results:
Selected features for LogisticRegression (forward): Index(['al', 'hrmo', 'sg_c', 'dm_yes'], dtype='object')
Model Accuracy (forward): 95.00%
Selected features for LogisticRegression (backward): Index(['su', 'hrmo', 'sg_c', 'sg_d'], dtype='object')
Model Accuracy (backward): 99.00%
Selected features for SVC (forward): Index(['al', 'hrmo', 'rc', 'sg_d'], dtype='object')
Model Accuracy (forward): 97.00%
Selected features for SVC (backward): Index(['sc', 'hrmo', 'sg_c', 'dm_yes'], dtype='object')
Model Accuracy (backward): 98.00%
Selected features for DecisionTreeClassifier (forward): Index(['su', 'rc', 'sg_c', 'dm_yes'], dtype='object')
Model Accuracy (forward): 95.00%
Selected features for DecisionTreeClassifier (backward): Index(['al', 'sc', 'sg_c', 'sg_d'], dtype='object')
Model Accuracy (backward): 99.00%
Selected features for RandomForestClassifier (forward): Index(['al', 'rc', 'sg_b', 'dm_yes'], dtype='object')
Model Accuracy (forward): 95.00%
Selected fea

0.99