In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.base import clone
from sklearn.feature_selection import RFE
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from tqdm import tqdm
import xgboost as xgb
from catboost import CatBoostClassifier

In [None]:
df = pd.read_csv(r'')
df.head()

In [3]:
X = df.drop(columns=['class'])
y = df['class']

lable_encoder = LabelEncoder()
y = lable_encoder.fit_transform(y)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [4]:
classifiers = {
    "KNN": KNeighborsClassifier(),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(n_estimators=300, random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "XGBoost": xgb.XGBClassifier(eval_metric='logloss', random_state=42),
    "CatBoost": CatBoostClassifier(verbose=0, random_state=42),
    "Extra Trees": ExtraTreesClassifier(n_estimators=300, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
}

## Recursive Features Elimination (RFE)

In [None]:
clf_for_rfe = RandomForestClassifier(n_estimators=300, random_state=42)
rfe = RFE(estimator=clf_for_rfe, n_features_to_select=80)
rfe.fit(X_scaled, y)

selected_features_RFE = X.columns[rfe.support_]
ranking_features_RFE = rfe.ranking_

print("Selected features: ", selected_features_RFE)

X_filtered = pd.DataFrame(rfe.transform(X_scaled), columns=selected_features_RFE)

In [None]:
features_ranking = pd.DataFrame({
    'Feature': X.columns,
    'Ranking': ranking_features_RFE
})

features_ranking_sorted = features_ranking.sort_values(by='Ranking')

features_ranking_selected = features_ranking_sorted[features_ranking_sorted['Ranking'] == 1]

print("Sorted features by ranking (high to low):")
features_ranking_selected.set_index('Feature', inplace=True)
print(features_ranking_selected.index)

features_ranking_selected

print("Feature ranking:\n", features_ranking_sorted)

In [None]:
results = {}

for clf_name, clf in classifiers.items():
    accuracies = []
    f1_scores = []
    
    for _ in tqdm(range(1)): 
        X_train, X_test, y_train, y_test = train_test_split(X_filtered, y, test_size=0.3, random_state=None, shuffle=True)
        
        clf_clone_train = clone(clf)
        clf_clone_train.fit(X_train, y_train)
        
        y_pred = clf_clone_train.predict(X_test)
        
        accuracies.append(accuracy_score(y_test, y_pred))
        f1_scores.append(f1_score(y_test, y_pred, average='weighted'))
    
    min_accuracy = np.min(accuracies)
    max_accuracy = np.max(accuracies)
    avg_accuracy = np.mean(accuracies)
    avg_f1_score = np.mean(f1_scores)
    
    results[clf_name] = (min_accuracy, max_accuracy, avg_accuracy, avg_f1_score)
    
    print(f'{clf_name} - Min Accuracy: {min_accuracy:.4f}, Max Accuracy: {max_accuracy:.4f}, Average Accuracy: {avg_accuracy:.4f}, Average F1 Score: {avg_f1_score:.4f}')