In [None]:
!pip --q install niapy

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/181.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m174.1/181.9 kB[0m [31m5.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m181.9/181.9 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import pandas as pd

In [None]:
data_path = '/content/allData800 final (1).csv'
df = pd.read_csv(data_path)

In [None]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from niapy.problems import Problem
from niapy.task import Task
from niapy.algorithms.basic import ParticleSwarmOptimization

In [None]:
X = df.drop(columns=['classification']).values
y = df['classification'].map({'ckd': 1, 'notckd': 0}).values

In [None]:
class FeatureSelectionProblem(Problem):
    def __init__(self, X_train, y_train, classifier, alpha=0.001):
        super().__init__(dimension=X_train.shape[1], lower=0, upper=1)
        self.X_train = X_train
        self.y_train = y_train
        self.classifier = classifier
        self.alpha = alpha

    def _evaluate(self, x):
        selected = x > 0.5
        num_selected = selected.sum()
        if num_selected < 4:
            return 1.0
        accuracy = cross_val_score(self.classifier, self.X_train[:, selected], self.y_train, cv=2, n_jobs=-1).mean()
        score = 1 - accuracy
        num_features = self.X_train.shape[1]
        return self.alpha * score + (1 - self.alpha) * (num_selected / num_features)


In [None]:
from sklearn.base import BaseEstimator, ClassifierMixin

class ClassifierWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, classifier):
        self.classifier = classifier

    def fit(self, X, y):
        self.classifier.fit(X, y)
        return self

    def predict(self, X):
        return self.classifier.predict(X)

    def score(self, X, y):
        return self.classifier.score(X, y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=1234)

In [None]:
# Create and run the PSO algorithm for SVM
svm_classifier = ClassifierWrapper(SVC())
svm_problem = FeatureSelectionProblem(X_train, y_train, svm_classifier)
svm_task = Task(svm_problem, max_iters=100)
svm_algorithm = ParticleSwarmOptimization(population_size=10, seed=1234)
svm_best_features, svm_best_fitness = svm_algorithm.run(svm_task)

In [None]:
# Get the selected features for SVM
svm_selected_features = svm_best_features > 0.5
print('SVM - Number of selected features:', svm_selected_features.sum())

SVM - Number of selected features: 4


In [None]:
# Create and run the PSO algorithm for MLP
mlp_classifier = ClassifierWrapper(MLPClassifier())
mlp_problem = FeatureSelectionProblem(X_train, y_train, mlp_classifier)
mlp_task = Task(mlp_problem, max_iters=100)
mlp_algorithm = ParticleSwarmOptimization(population_size=10, seed=1234)
mlp_best_features, mlp_best_fitness = mlp_algorithm.run(mlp_task)


In [None]:
# Get the selected features for MLP
mlp_selected_features = mlp_best_features > 0.5
print('MLP - Number of selected features:', mlp_selected_features.sum())


MLP - Number of selected features: 4


In [None]:
# Create and run the PSO algorithm for Random Forest
rf_classifier = ClassifierWrapper(RandomForestClassifier())
rf_problem = FeatureSelectionProblem(X_train, y_train, rf_classifier)
rf_task = Task(rf_problem, max_iters=100)
rf_algorithm = ParticleSwarmOptimization(population_size=10, seed=1234)
rf_best_features, rf_best_fitness = rf_algorithm.run(rf_task)


In [None]:
# Get the selected features for Random Forest
rf_selected_features = rf_best_features > 0.5
print('Random Forest - Number of selected features:', rf_selected_features.sum())


Random Forest - Number of selected features: 4


In [None]:
# Create and run the PSO algorithm for XGBoost
xgb_classifier = ClassifierWrapper(XGBClassifier())
xgb_problem = FeatureSelectionProblem(X_train, y_train, xgb_classifier)
xgb_task = Task(xgb_problem, max_iters=100)
xgb_algorithm = ParticleSwarmOptimization(population_size=10, seed=1234)
xgb_best_features, xgb_best_fitness = xgb_algorithm.run(xgb_task)


In [None]:
# Get the selected features for XGBoost
xgb_selected_features = xgb_best_features > 0.5
print('XGBoost - Number of selected features:', xgb_selected_features.sum())


XGBoost - Number of selected features: 4


In [None]:
# Print the selected feature names for SVM
feature_names = df.drop(columns=['classification']).columns
print('SVM - Selected features:', ', '.join(feature_names[svm_selected_features].tolist()))
svm_selected_feature_names = feature_names[svm_selected_features].tolist()
# Print the selected feature names for MLP
print('MLP - Selected features:', ', '.join(feature_names[svm_selected_features].tolist()))
mlp_selected_feature_names = feature_names[svm_selected_features].tolist()
# Print the selected feature names for Random Forest
print('Random Forest - Selected features:', ', '.join(feature_names[rf_selected_features].tolist()))
rf_selected_feature_names = feature_names[rf_selected_features].tolist()
# Print the selected feature names for XGBoost
print('XGBoost - Selected features:', ', '.join(feature_names[xgb_selected_features].tolist()))
xgb_selected_feature_names = feature_names[xgb_selected_features].tolist()

SVM - Selected features: vol, pot, glob, ane
MLP - Selected features: vol, pot, glob, ane
Random Forest - Selected features: sg, chlo, bun, ane
XGBoost - Selected features: sg, chlo, bun, ane


In [None]:
svm_selected_feature_names

['vol', 'pot', 'glob', 'ane']

In [None]:
mlp_selected_feature_names

['vol', 'pot', 'glob', 'ane']

In [None]:
rf_selected_feature_names

['sg', 'chlo', 'bun', 'ane']

In [None]:
xgb_selected_feature_names

['sg', 'chlo', 'bun', 'ane']

In [None]:
# Combine all features
all_features = svm_selected_feature_names + mlp_selected_feature_names + rf_selected_feature_names + xgb_selected_feature_names

all_features

['vol',
 'pot',
 'glob',
 'ane',
 'vol',
 'pot',
 'glob',
 'ane',
 'sg',
 'chlo',
 'bun',
 'ane',
 'sg',
 'chlo',
 'bun',
 'ane']

In [None]:
from collections import Counter


feature_counts = Counter(all_features)
final_selected_features = [feature for feature, count in feature_counts.items() if count >= 2]

print("Final Selected Features:", final_selected_features)

Final Selected Features: ['vol', 'pot', 'glob', 'ane', 'sg', 'chlo', 'bun']


In [None]:
type(svm_selected_features)

numpy.ndarray

In [None]:
feature_counts_X = df.drop(columns=['classification']).apply(lambda col: col.value_counts().to_dict())
final_selected_features_array = df.drop(columns=['classification']).columns.isin(final_selected_features)

print("\nFinal Selected Features Array:")
print(final_selected_features_array)


Final Selected Features Array:
[False False  True  True False False  True  True False False False  True
 False False  True False False False False False  True]


In [None]:
svm_model_selected = SVC()
svm_model_all = SVC()
svm_model_selected.fit(X_train[:, svm_selected_features], y_train)
svm_model_all.fit(X_train, y_train)
print('SVM - Subset accuracy:', svm_model_selected.score(X_test[:, svm_selected_features], y_test))
print('SVM - All Features Accuracy:', svm_model_all.score(X_test, y_test))

SVM - Subset accuracy: 0.98125
SVM - All Features Accuracy: 0.78125


In [None]:
# Train models with selected and all features for MLP
mlp_model_selected = MLPClassifier(max_iter=1000, early_stopping=True)
mlp_model_all = MLPClassifier(max_iter=1000, early_stopping=True)
mlp_model_selected.fit(X_train[:, mlp_selected_features], y_train)
mlp_model_all.fit(X_train, y_train)
print('MLP - Subset accuracy:', mlp_model_selected.score(X_test[:, mlp_selected_features], y_test))
print('MLP - All Features Accuracy:', mlp_model_all.score(X_test, y_test))


MLP - Subset accuracy: 0.94375
MLP - All Features Accuracy: 0.76875
