<span style="color:blue; font-size:20px">RFE_SelectedFeatures_Classification.</span>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
def rfeFeature_with_names(indep_X, dep_Y, n):
    """
    Performs Recursive Feature Elimination and returns a list of dictionaries,
    each containing the transformed data and the names of the selected features.
    """
    rfemodellist = [
        LogisticRegression(solver='lbfgs'),
        SVC(kernel='linear', random_state=0),
        RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0),
        DecisionTreeClassifier(criterion='gini', max_features='sqrt', splitter='best', random_state=0)
    ]
    rfemodel_names = ['Logistic', 'SVC', 'RandomForest', 'DecisionTree']
    
    results = []
    for model_name, model in zip(rfemodel_names, rfemodellist):
        print(f"Applying RFE with {model_name} as base model...")
        rfe_selector = RFE(model, n_features_to_select=n)
        rfe_selector.fit(indep_X, dep_Y)
        
        selected_features_mask = rfe_selector.support_
        selected_features_names = indep_X.columns[selected_features_mask].tolist()
        
        print(f"Selected features by {model_name}: {selected_features_names}")
        
        rfe_features_transformed = rfe_selector.transform(indep_X)
        
        results.append({'features_data': rfe_features_transformed, 'feature_names': selected_features_names})
    
    return results

In [3]:
def split_scalar(indep_X, dep_Y):
    """Splits data and applies standard scaling."""
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

def cm_prediction(classifier, X_test, y_test):
    """Generates predictions and classification reports."""
    y_pred = classifier.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return accuracy, report, cm

In [4]:
def get_accuracy_for_model(classifier_name, X_train, y_train, X_test, y_test):
    """Trains a classifier and returns its accuracy."""
    classifier = None
    if classifier_name == 'Logistic':
        classifier = LogisticRegression(random_state=0)
    elif classifier_name == 'SVMl':
        classifier = SVC(kernel='linear', random_state=0)
    elif classifier_name == 'SVMnl':
        classifier = SVC(kernel='rbf', random_state=0)
    elif classifier_name == 'KNN':
        classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
    elif classifier_name == 'Navie':
        classifier = GaussianNB()
    elif classifier_name == 'Decision':
        classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
    elif classifier_name == 'Random':
        classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
    
    classifier.fit(X_train, y_train)
    accuracy, _, _ = cm_prediction(classifier, X_test, y_test)
    return accuracy



In [15]:
def rfe_classification(results, dep_Y):
    """
    Populates a DataFrame with accuracy scores for each classifier
    and each RFE feature set.
    """
    index_labels = [f"RFE_{name}" for name in ['Logistic', 'SVC', 'RandomForest', 'DecisionTree']]
    
    rfedataframe = pd.DataFrame(index=index_labels,
                                columns=['Logistic', 'SVMl', 'SVMnl', 'KNN', 'Navie', 'Decision', 'Random'])
    
    classifier_names = ['Logistic', 'SVMl', 'SVMnl', 'KNN', 'Navie', 'Decision', 'Random']
    
    for number, result in enumerate(results):
        X_train, X_test, y_train, y_test = split_scalar(result['features_data'], dep_Y)
        
        for classifier_name in classifier_names:
            accuracy = get_accuracy_for_model(classifier_name, X_train, y_train, X_test, y_test)
            rfedataframe.iloc[number][classifier_name] = accuracy
    
    rfedataframe['Selected_Features'] = [r['feature_names'] for r in results]
    return rfedataframe

In [16]:
dataset1 = pd.read_csv("prep.csv", index_col=None)
df2 = dataset1
df2 = pd.get_dummies(df2, drop_first=True)
indep_X = df2.drop('classification_yes', axis=1)
dep_Y = df2['classification_yes']

In [17]:
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [18]:
rfe_results = rfeFeature_with_names(indep_X, dep_Y, 3)

Applying RFE with Logistic as base model...
Selected features by Logistic: ['sg_c', 'sg_d', 'htn_yes']
Applying RFE with SVC as base model...
Selected features by SVC: ['sg_d', 'dm_yes', 'appet_yes']
Applying RFE with RandomForest as base model...
Selected features by RandomForest: ['sc', 'hrmo', 'pcv']
Applying RFE with DecisionTree as base model...
Selected features by DecisionTree: ['hrmo', 'sg_c', 'dm_yes']


In [19]:
result = rfe_classification(rfe_results, dep_Y)

In [20]:
result

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random,Selected_Features
RFE_Logistic,0.94,0.94,0.94,0.94,0.94,0.94,0.94,"[sg_c, sg_d, htn_yes]"
RFE_SVC,0.87,0.87,0.87,0.87,0.87,0.87,0.87,"[sg_d, dm_yes, appet_yes]"
RFE_RandomForest,0.94,0.94,0.94,0.94,0.9,0.91,0.92,"[sc, hrmo, pcv]"
RFE_DecisionTree,0.98,0.98,0.98,0.98,0.79,0.97,0.97,"[hrmo, sg_c, dm_yes]"


In [None]:
The image shows the results of a Recursive Feature Elimination (RFE) process combined with various machine learning classifiers.
Selected Features
The features selected by each RFE model are listed in the Selected_Features column:
RFE_Logistic: sg_c, sg_d, htn_yes
RFE_SVC: sg_d, dm_yes, appet_yes
RFE_RandomForest: sc, hrmo, pcv
RFE_DecisionTree: hrmo, sg_c, dm_yes