In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import accuracy_score

# Load dataset
dataset1 = pd.read_csv("prep.csv", index_col=None)
df2 = dataset1
df2 = pd.get_dummies(df2, drop_first=True)

# Define the independent and dependent variables
indep_X = df2.drop('classification_yes', axis=1)
dep_Y = df2['classification_yes']

# Split data
X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Logistic Regression Sequential Feature Selector
log_model = LogisticRegression(solver='lbfgs', random_state=0)
sfs_log = SequentialFeatureSelector(log_model, n_features_to_select=5, direction='forward', scoring='accuracy', cv=5)
sfs_log = sfs_log.fit(X_train, y_train)
log_selected_features = sfs_log.get_support()
print(f"Selected features for Logistic Regression: {indep_X.columns[log_selected_features]}")

# SVM Linear Sequential Feature Selector
svc_model = SVC(kernel='linear', random_state=0)
sfs_svc = SequentialFeatureSelector(svc_model, n_features_to_select=5, direction='forward', scoring='accuracy', cv=5)
sfs_svc = sfs_svc.fit(X_train, y_train)
svc_selected_features = sfs_svc.get_support()
print(f"Selected features for SVM Linear: {indep_X.columns[svc_selected_features]}")

# Decision Tree Sequential Feature Selector
dt_model = DecisionTreeClassifier(random_state=0)
sfs_dt = SequentialFeatureSelector(dt_model, n_features_to_select=5, direction='forward', scoring='accuracy', cv=5)
sfs_dt = sfs_dt.fit(X_train, y_train)
dt_selected_features = sfs_dt.get_support()
print(f"Selected features for Decision Tree: {indep_X.columns[dt_selected_features]}")

# Random Forest Sequential Feature Selector
rf_model = RandomForestClassifier(random_state=0)
sfs_rf = SequentialFeatureSelector(rf_model, n_features_to_select=5, direction='forward', scoring='accuracy', cv=5)
sfs_rf = sfs_rf.fit(X_train, y_train)
rf_selected_features = sfs_rf.get_support()
print(f"Selected features for Random Forest: {indep_X.columns[rf_selected_features]}")

# Function to evaluate model with selected features
def evaluate_model(model, X_train, y_train, X_test, y_test, selected_features):
    X_train_selected = X_train[:, selected_features]
    X_test_selected = X_test[:, selected_features]
    
    # Fit the model and make predictions
    model.fit(X_train_selected, y_train)
    y_pred = model.predict(X_test_selected)
    
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model Accuracy: {accuracy * 100:.2f}%")
    return accuracy

# Evaluate Logistic Regression
evaluate_model(log_model, X_train, y_train, X_test, y_test, log_selected_features)

# Evaluate SVM Linear
evaluate_model(svc_model, X_train, y_train, X_test, y_test, svc_selected_features)

# Evaluate Decision Tree
evaluate_model(dt_model, X_train, y_train, X_test, y_test, dt_selected_features)

# Evaluate Random Forest
evaluate_model(rf_model, X_train, y_train, X_test, y_test, rf_selected_features)


Selected features for Logistic Regression: Index(['al', 'sc', 'hrmo', 'sg_c', 'dm_yes'], dtype='object')
Selected features for SVM Linear: Index(['al', 'hrmo', 'rc', 'sg_c', 'sg_d'], dtype='object')
Selected features for Decision Tree: Index(['su', 'rc', 'sg_c', 'sg_d', 'dm_yes'], dtype='object')
Selected features for Random Forest: Index(['al', 'rc', 'sg_b', 'pcc_present', 'dm_yes'], dtype='object')
Model Accuracy: 96.00%
Model Accuracy: 97.00%
Model Accuracy: 97.00%
Model Accuracy: 96.00%


0.96