In [None]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from random import uniform, randint
from sklearn.preprocessing import LabelEncoder

In [None]:
def initialize_positions(no_of_particles, dimensions):
    positions = np.zeros([no_of_particles, dimensions],dtype='float')

    for i in range(no_of_particles):
        for j in range(dimensions):
            positions[i,j]= abs(uniform(0,1) - uniform(0,1))

    return positions


In [None]:
def initilize_velocities(no_of_particles, dimensions):
    velocities = np.zeros([no_of_particles,dimensions],dtype='float')

    for i in range(no_of_particles):
        for j in range(dimensions):
            velocities[i,j] = abs(uniform(0,1)-uniform(0,1))

    return velocities

In [None]:
def to_binary(positions,threshold, no_of_particles,dimensions):
    binary_positions = np.zeros([no_of_particles, dimensions])

    for i in range(no_of_particles):
        for j in range(dimensions):
            if positions[i,j]>=threshold:
                binary_positions[i,j] = 1
            else:
                binary_positions[i,j]=0

    return binary_positions


In [None]:
def findFeatures(particle):
    features=[]
    for i in range(len(particle)):
      if particle[i]==1:
        features.append(i)
    return features

In [None]:
def fitness_function(particle,xtrain,xtest,ytrain,ytest):
    ct=0

    for i in range(len(particle)):
        if particle[i]==1:
            ct+=1

    if(ct==0):
        index = randint(0,len(particle)-1)
        particle[index]=1

    features = findFeatures(particle)
    num_train = np.size(xtrain, 0)
    num_test = np.size(xtest, 0)

    new_xtrain = xtrain.iloc[:,features]
    new_xtest = xtest.iloc[:,features]
    new_ytrain = ytrain
    new_ytest = ytest

    decision_model = DecisionTreeClassifier(criterion='gini')

    decision_model.fit(new_xtrain, new_ytrain)

    predictions = decision_model.predict(new_xtest)

    score = accuracy_score(ytest, predictions)

    return score

In [None]:
# main function that will run the PSO and working of the PSO

def run_PSO(w, c1, c2, no_of_particles, maximum_iterations, threshold,xtrain,xtest,ytrain,ytest):

    # here first we are going to initialize some variables that will help us in
    # finding the initial positions and the initial velocities

    # these are some of the varibale we will be using, we we will update te velocities and the position

    # so first we have created two arrays upper and lower bounds that will be used in
    # calculations of the intial positions and the velocities

    dimensions = np.size(xtrain, 1)

    # here we are calling our initial_positions() functions to get the initial positionS  of the particles..
    positions = initialize_positions(
        no_of_particles, dimensions)

    # similarly for initial velocities -> obviously both will ghet some random variables
    velocities = initilize_velocities(
        no_of_particles, dimensions)

    # here we have created some matrices and vectors of zeros ,and some variables to store the postions , best positions, fitenss, best_fitness etc
    # names are self explanatory here

    fitness_of_particles = np.zeros([no_of_particles, 1], dtype='float')
    best_fitness_of_particles = float('-inf')*np.ones([no_of_particles, 1], dtype='float')
    global_best_fitness = float('-inf')
    best_particle_position = np.zeros([no_of_particles, dimensions], dtype='float')
    global_best_positions = np.zeros([1, dimensions], dtype='float')

    # this matrices will store our global_best_fitness value at each iteration..

    t = 0  # a variable to keep the counter of number of iterations.

    # loop to keep the iterations on for the selection of the best result
    while t < maximum_iterations:

        # call to to_binary() functiion to convert matrices into values of ones and zeros only
        position_bin = to_binary(positions, threshold, no_of_particles, dimensions)

        # this loop will iterate through our population and for each particle it will find the fitness value and do the need full
        for i in range(no_of_particles):
            fitness_of_particles[i, 0] = fitness_function(position_bin[i, :],xtrain,xtest,ytrain,ytest)

            # if(fitness of current particle is better than its previous best value -> then do this updation)
            if fitness_of_particles[i, 0] >= best_fitness_of_particles[i, 0]:
                # best_position will be updated to the current position
                best_particle_position[i, :] = positions[i, :]
                # and best fitness value will be updated to the current fitness value
                best_fitness_of_particles[i, 0] = fitness_of_particles[i, 0]

            # similarly if it is better than the global best value then update global best fitness value as well as the global best position
            if best_fitness_of_particles[i, 0] >= global_best_fitness:
                global_best_positions[0, :] = best_particle_position[i, :]

                global_best_fitness = best_fitness_of_particles[i, 0]

        t += 1
        # print("Global_best_fitness: ", global_best_fitness)
        # update the position and the velocity values of the variable of each particle and and move to the next iteration
        for i in range(no_of_particles):
            for d in range(dimensions):
                r1 = uniform(0,1)
                r2 = uniform(0,1)

                # update velocity using this given formula here where c1 and c2 are accelaration factors, w is the inertia weight and r1 and r2 are random numbers
                velocities[i, d] = w * velocities[i, d] + c1*r1*(
                    best_particle_position[i, d]-positions[i, d]) + c2*r2*(global_best_positions[0, d]-positions[i, d])


                # update the position also
                positions[i, d] = positions[i, d] + velocities[i, d]

    # the answers after the iterations complete and we are returning these values to aur calling function.
    global_best_binary = to_binary(global_best_positions, threshold, 1, dimensions)
    global_best_binary = global_best_binary.reshape(dimensions)
    pos = np.asarray(range(0, dimensions))
    selected_indexes = pos[global_best_binary == 1]
    number_of_features_selected = len(selected_indexes)

    pso_data = {'sf': selected_indexes,
                'nf': number_of_features_selected, 'score': global_best_fitness}

    return pso_data

In [None]:
def start_pso(xtrain,xtest,ytrain,ytest, no_of_particles, maximum_iterations,threshold,total_num_ft):

    c1 = 2
    c2 = 2
    w = 0.5
    output_of_pso = run_PSO(
          w, c1, c2, no_of_particles, maximum_iterations, threshold,xtrain,xtest,ytrain,ytest)

    return [output_of_pso['score'],output_of_pso['sf']]

In [None]:
def init_local_pso(output_df,labels, no_of_particles, maximum_iterations):

    threshold=0.5

    features = output_df
    total_num_ft = features.shape[1]

    # data split for training and testing ..
    xtrain, xtest, ytrain, ytest = train_test_split(features, labels, test_size=0.3, random_state=30)
    returned_ans = start_pso(xtrain,xtest,ytrain,ytest, no_of_particles, maximum_iterations,threshold,total_num_ft)
    return returned_ans

In [None]:
best_score_all=0
best_selected_df =[]

In [None]:
def local_pso(best_score, output_df,labels,no_of_particles, maximum_iterations):
    global best_score_all, best_selected_ft
    if(best_score>=best_score_all):
        best_score_all = best_score
        best_selected_df = output_df

    print("best_updates: ",best_score_all)

    ans = init_local_pso(output_df, labels, no_of_particles, maximum_iterations)

    if(ans[0]>=best_score_all):
        output_df = output_df.iloc[:, ans[1]].copy()
        print("local_df", output_df)
        local_pso(ans[0],output_df,labels,no_of_particles, maximum_iterations)
    else:
        print("Best score ended: ",best_score_all)
        print("Best susbset of ft ended: ", best_selected_df)
        best_selected_df['labels'] = labels
        best_selected_df.to_csv('result.csv',index=False)
        return

In [None]:
def init_global_pso():
    # path_csv = "/content/DARWIN.csv"

    # no_of_particles = int(input("Enter number of particles: \n"))
    # maximum_iterations = int(input("Enter the number of maximum iterations: \n"))
    no_of_particles = 6
    maximum_iterations = 10
    # threshold = float(input("Enter the threshold value between(0-1): \n"))
    threshold = 0.5

    df1 = pd.read_csv(r'uploaded_file.csv')
    # print("shape: ",df1.shape)
    # df1 = df1.drop(['ID'],axis=1)
    # label_encoder = LabelEncoder()

    # # Fit and transform the 'class' column
    # df1['class'] = label_encoder.fit_transform(df1['class'])

    #Rename the column names
    # df1.columns = range(1, len(df1.columns)+1)

    features = df1.iloc[:, 0:-1]
    labels = df1.drop(df1.iloc[:, 0:-1], axis=1)

    total_num_ft = features.shape[1]

        # data split for training and testing ..
    xtrain, xtest, ytrain, ytest = train_test_split(features, labels, test_size=0.3, random_state=30)
    returned_ans = start_pso(xtrain,xtest,ytrain,ytest, no_of_particles, maximum_iterations,threshold, total_num_ft)
    output_df = df1.iloc[:, returned_ans[1]].copy()
    local_pso(returned_ans[0], output_df,labels,no_of_particles, maximum_iterations)

In [None]:
init_global_pso()

In [None]:
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.metrics import accuracy_score

# # Sample dataset (replace this with your own data)
# df = pd.read_csv('/content/final_dataset.csv')

# # Separate features (X) and target (y)
# features = df.iloc[:, 0:-1]
# labels = df.drop(df.iloc[:, 0:-1], axis=1)

# # Split the dataset into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=30)

# # Initialize the DecisionTreeClassifier
# clf = DecisionTreeClassifier()

# # Train the classifier on the training data
# clf.fit(X_train, y_train)

# # Make predictions on the test data
# y_pred = clf.predict(X_test)

# # Evaluate the classifier's accuracy
# accuracy = accuracy_score(y_test, y_pred)
# print("Accuracy:", accuracy)


Accuracy: 0.9433962264150944
