In [None]:
import numpy as np

def Evaluate(ACTUAL, PREDICTED):
    un = np.unique(ACTUAL)
    tp = np.zeros(len(un))
    tn = np.zeros(len(un))
    fp = np.zeros(len(un))
    fn = np.zeros(len(un))

    for i, cls in enumerate(un):
        idx = (ACTUAL == cls)
        p = np.sum(ACTUAL == cls)
        n = np.sum(ACTUAL != cls)
        N = p + n

        tp[i] = np.sum((ACTUAL[idx] == PREDICTED[idx]))
        tn[i] = np.sum((ACTUAL[~idx] == PREDICTED[~idx]))
        fp[i] = n - tn[i]
        fn[i] = p - tp[i]

    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    accuracy = (tp + tn) / N
    sensitivity = tp_rate
    specificity = tn_rate
    precision = tp / (tp + fp)
    recall = sensitivity
    f_measure = 2 * ((precision * recall) / (precision + recall))
    gmax = np.sqrt(tp_rate * tn_rate)

    sensitivity[np.isnan(sensitivity)] = 0
    precision[np.isnan(precision)] = 0
    recall[np.isnan(recall)] = 0
    f_measure[np.isnan(f_measure)] = 0
    gmax[np.isnan(gmax)] = 0

    EVAL = np.array([max(accuracy), max(sensitivity), max(specificity), max(precision), max(recall), max(f_measure), max(gmax)])
    cf = np.vstack((tp, tn, fp, fn))

    return EVAL, cf


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

def fitness(dat, lab, sl):
    ind = np.where(sl)[0]
    data_subset = dat[:, ind]

    md = RandomForestClassifier()
    md.fit(data_subset, lab)
    LL = md.predict(data_subset)
    accuracy = accuracy_score(lab, LL)
    ft = 1 - accuracy

    return ft


In [None]:
import numpy as np

def levy(n, m, beta):
    num = np.math.gamma(1 + beta) * np.sin(np.pi * beta / 2)
    den = np.math.gamma((1 + beta) / 2) * beta * 2 ** ((beta - 1) / 2)
    sigma_u = (num / den) ** (1 / beta)

    u = np.random.normal(0, sigma_u ** 2, (n, m))
    v = np.random.normal(0, 1, (n, m))
    z = u / (np.abs(v) ** (1 / beta))

    return z


In [None]:
import os
import pandas as pd
from sklearn.impute import KNNImputer

file_path = "/content/diabetes.csv"

file_name = os.path.basename(file_path)
file_dir = os.path.dirname(file_path)

X = pd.read_csv(file_path)

data2 = X.iloc[:, :-1].values
class_label = X.iloc[:, -1].values

imputer = KNNImputer(n_neighbors=5)
data1 = imputer.fit_transform(data2)


In [None]:

FSL, FSU = 0, 1
D = data1.shape[1]
FS = np.zeros((10, D))
fit = np.zeros(10)


for i in range(10):
    FS[i, :] = FSL + np.random.uniform(0, 1, D) * (FSU - FSL)
    try:
        fit[i] = fitness(data1, class_label, FS[i, :])
    except:
        fit[i] = 1
        continue

ind = np.where(fit == np.min(fit))[0]
FSnew = FS[ind, :]
pdp = 0.1
row, V, S, cd, CL, hg, sf = 1.204, 5.25, 0.0154, 0.6, 0.7, 1, 18
Gc = 1.9
D1 = 1 / (2 * row * V ** 2 * S * cd)
L = 1 / (2 * row * V ** 2 * S * CL)
tanpi = D1 / L
dg = hg / (tanpi * sf)
aa = np.random.randint(1, len(ind))
iter, maxiter = 1, 4

while iter < maxiter:
    for i in range(10):
        if np.random.rand() >= pdp:
            FS[i, :] = np.round(FS[i, :] + (dg * Gc * np.abs(FSnew[i, :] - FS[i, :])))
        else:
            FS[i, :] = FSL + np.random.uniform(0, 1, D) * (FSU - FSL)
        Fh = FS
        # print(FS)
        fit1 = fitness(data1, class_label, FS[i, :])
        ind1 = np.where(fit1 == np.min(fit1))[0]
        FSnew1 = FS[ind1, :]

        if np.random.rand() > pdp:
            FS[i, :] = np.round(FS[i, :] + (dg * Gc * abs(FSnew[aa, :] - FS[i, :])))
        else:
            FS[i, :] = FSL + np.random.uniform(0, 1, D) * (FSU - FSL)
        Fa = FS
        fit2 = fitness(data1, class_label, FS[i, :])
        ind2 = np.where(fit2 == np.min(fit2))[0]
        FSnew2 = FS[ind2, :]

    Sc = np.sqrt(np.sum(np.abs(Fh - Fa)) ** 2)
    Smin = (10 * np.exp(-6)) / (365) ** (iter / (maxiter / 2.5))

    if Sc < Smin:
        season = 'summer'
        for i in range(10):
            FS[i, :] = FSL + levy(1, D, 1.5) * (FSU - FSL)
    else:
        season = 'winter'
        break

    fit3 = fitness(data1, class_label, FS[i, :])
    ind3 = np.where(fit3 == np.min(fit3))[0]
    final = np.array([Fh[ind1, :], Fa[ind2, :], FS[ind3, :]])
    final = np.abs(final.round())
    for i in range(final.shape[0]):
        fitt = fitness(data1, class_label, final[i, :])

    best = np.min(fitt)
    inn = np.argmin(fitt)
    bestfeat = final[inn, :]
    pdp = best

    iter += 1


In [None]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import random


sel = np.where(bestfeat[-1])[0]
print('Selected Features')
print(sel)

dataA = data2[:, sel]  # Assuming data2 is your test data

p = 0.75  # Proportion of rows to select for training
N = dataA.shape[0]  # Total number of rows
tf = np.full(N, False)  # Create logical index vector
tf[:round(p*N)] = True
np.random.shuffle(tf)  # Randomize order

dataTraining = dataA[tf, :]
labeltraining = class_label[tf]

dataTesting = dataA[~tf, :]
labeltesting = class_label[~tf]

print('Training feature size:', len(dataTraining))
print('Testing feature size:', len(dataTesting))

# Support Vector Machine (SVM)
svt = SVC()
svt.fit(dataTraining, labeltraining)
out1 = svt.predict(dataTesting)

# K-Nearest Neighbors (KNN)
mdl = KNeighborsClassifier()
mdl.fit(dataTraining, labeltraining)
out2 = mdl.predict(dataTesting)

# Random Forest (RF)
mdl = RandomForestClassifier()
mdl.fit(dataTraining, labeltraining)
out3 = mdl.predict(dataTesting)

# Evaluation
def evaluate(y_true, y_pred):
    cf = confusion_matrix(y_true, y_pred)
    report = classification_report(y_true, y_pred, output_dict=True)
    accuracy = report['accuracy']
    precision = report['macro avg']['precision']
    recall = report['macro avg']['recall']
    f1_score = report['macro avg']['f1-score']
    return accuracy, precision, recall, f1_score

eval1 = evaluate(labeltesting, out1)
eval2 = evaluate(labeltesting, out2)
eval3 = evaluate(labeltesting, out3)

print("SVM Performance:")
print("Accuracy:", eval1[0])
print("Precision:", eval1[1])
print("Recall:", eval1[2])
print("F1-Score:", eval1[3])

print("KNN Performance:")
print("Accuracy:", eval2[0])
print("Precision:", eval2[1])
print("Recall:", eval2[2])
print("F1-Score:", eval2[3])

print("Random Forest Performance:")
print("Accuracy:", eval3[0])
print("Precision:", eval3[1])
print("Recall:", eval3[2])
print("F1-Score:", eval3[3])

Selected Features
[0 1 2 6]
Training feature size: 576
Testing feature size: 192
SVM Performance:
Accuracy: 0.734375
Precision: 0.75557461406518
Recall: 0.6570257611241218
F1-Score: 0.6616798535051653
KNN Performance:
Accuracy: 0.7135416666666666
Precision: 0.6996254097081317
Recall: 0.6497658079625293
F1-Score: 0.6551611533814452
Random Forest Performance:
Accuracy: 0.71875
Precision: 0.7118421052631578
Recall: 0.6508196721311476
F1-Score: 0.656005308560053
