In [1]:
from sklearn.datasets import make_classification
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
import operator
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier, BaggingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

In [2]:
import pandas as pd
from sklearn.metrics import r2_score
from jostar.algorithms import ACO
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
# Define parameters
num_ants = 4
num_iterations = 3
q0 = 0.5
alpha = 1.0
beta = 0.7
rho = 0.45

In [4]:
data = pd.read_csv("pavdatfinal.csv")

In [5]:
data.head()

Unnamed: 0,CC,CCL,CCO,CI,CLC,CLLC,LDC,LLDC,LCOM5,NL,...,TNLM,TNLPA,TNLPM,TNLS,TNM,TNOS,TNPA,TNPM,TNS,bug
0,0.0,0,0,0,0.0,0.0,0,0,6,1,...,13,0,13,0,13,49,0,13,0,0
1,0.0,0,0,0,0.0,0.0,0,0,0,0,...,5,0,5,0,5,8,0,5,0,0
2,0.0,0,0,0,0.0,0.0,0,0,1,4,...,4,0,3,2,4,33,0,3,2,0
3,0.0,0,0,0,0.0,0.0,0,0,2,1,...,15,2,15,0,15,21,5,15,0,0
4,0.0,0,0,0,0.0,0.0,0,0,0,0,...,2,0,2,0,2,0,0,2,0,0


In [6]:
X = data.drop(['bug'], axis = 1)
y = data['bug']
X.head()

Unnamed: 0,CC,CCL,CCO,CI,CLC,CLLC,LDC,LLDC,LCOM5,NL,...,TNLG,TNLM,TNLPA,TNLPM,TNLS,TNM,TNOS,TNPA,TNPM,TNS
0,0.0,0,0,0,0.0,0.0,0,0,6,1,...,2,13,0,13,0,13,49,0,13,0
1,0.0,0,0,0,0.0,0.0,0,0,0,0,...,4,5,0,5,0,5,8,0,5,0
2,0.0,0,0,0,0.0,0.0,0,0,1,4,...,0,4,0,3,2,4,33,0,3,2
3,0.0,0,0,0,0.0,0.0,0,0,2,1,...,10,15,2,15,0,15,21,5,15,0
4,0.0,0,0,0,0.0,0.0,0,0,0,0,...,0,2,0,2,0,2,0,0,2,0


In [7]:
X.shape,y.shape

((77676, 60), (77676,))

In [8]:
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)
# X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [9]:
pheromone = np.random.rand(X.shape[1])
print(pheromone.shape)

(60,)


In [10]:
# Define fitness function
def fitness(X, y, pheromone):
    ran = np.random.rand(X.shape[1])
    # print(pheromone)
    # print(ran)
    selected_features = np.where(ran < pheromone)[0]
    # print(selected_features)
    if len(selected_features) == 0:
        return 0.0
#     clf = SVC()

#     clf =  DecisionTreeClassifier()

    clf =  RandomForestClassifier(n_estimators=10)
    clf.fit(X.iloc[:, selected_features], y)
    y_pred = clf.predict(X_test.iloc[:, selected_features])
    # print(y_test)
    # print(y_pred)
    return accuracy_score(y_test, y_pred)
# print(fitness(X_train,y_train,pheromone))

In [11]:
# Initialize best solution and its fitness
best_solution = None
best_fitness = 0.0

# Run the ACO algorithm
for iteration in range(num_iterations):
    # Initialize ant solutions
    print("Iteration: ", iteration)
    solutions = np.zeros((num_ants, X_train.shape[1]))
    # print(iteration," : ",solutions,"\n")
    for ant in range(num_ants):
        for feature in range(X_train.shape[1]):
            r = np.random.rand()
            if r < q0:
                solutions[ant, feature] = 1
                # print("In if:", solutions,"\n")
            else:
                probabilities = pheromone ** alpha * (1.0 / fitness(X_train, y_train, pheromone)) ** beta
                probabilities /= np.sum(probabilities)
                # print("probabilities",probabilities)
                pro = probabilities[feature]
                # print("Probabilites sum: ", np.sum(probabilities))
                # solutions[ant, feature] = np.random.choice([0, 1], p=probabilities)
                solutions[ant, feature] = np.random.choice([0, 1], p=[1-pro,pro])
                # print("In else:", solutions,"\n")
    
    # Update pheromone levels
    print("Updating Pheromone:")
    for feature in range(X_train.shape[1]):
        selected_ants = np.where(solutions[:, feature] == 1)[0]
        pheromone[feature] = (1.0 - rho) * pheromone[feature] + rho * np.sum([1.0 / fitness(X_train, y_train, pheromone) for ant in selected_ants])
    
    for ant in range(num_ants):
        ant_fitness = fitness(X_train, y_train, solutions[ant])
        if ant_fitness > best_fitness:
            best_solution = solutions[ant]
            best_fitness = ant_fitness
    
    # Print results
    print("Iteration {}: Best Fitness = {:.4f}".format(iteration+1, best_fitness))
    print(best_solution)

Iteration:  0
Updating Pheromone:
Iteration 1: Best Fitness = 0.9248
[0. 0. 1. 0. 0. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0.
 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 0. 1. 1. 0. 1. 0. 1. 0. 0. 0. 0. 1.
 0. 0. 0. 1. 1. 0. 0. 1. 1. 0. 1. 1.]
Iteration:  1
Updating Pheromone:
Iteration 2: Best Fitness = 0.9248
[0. 0. 1. 0. 0. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0.
 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 0. 1. 1. 0. 1. 0. 1. 0. 0. 0. 0. 1.
 0. 0. 0. 1. 1. 0. 0. 1. 1. 0. 1. 1.]
Iteration:  2
Updating Pheromone:
Iteration 3: Best Fitness = 0.9248
[0. 0. 1. 0. 0. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0.
 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 0. 1. 1. 0. 1. 0. 1. 0. 0. 0. 0. 1.
 0. 0. 0. 1. 1. 0. 0. 1. 1. 0. 1. 1.]
