In [133]:
import pandas as pd
import numpy as np
from random import choice, random
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [102]:
def preprocessor(X):
    # Preprocessing
    
    categorical_columns = [
        'job', 'marital', 'education', 'default', 
        'housing', 'loan', 'contact', 'month', 'poutcome'
    ]
    for col in categorical_columns:
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col].astype('str'))

    return X 

In [103]:
class Individual:
    def __init__(self, chromosome):
        self.chromosome = chromosome
        self.fitness = None

    def __str__(self):
        return self.chromosome

    def evaluate(self, X, y):
        if self.chromosome.count('1') == 0:
            return 0

        knn = KNeighborsClassifier(n_neighbors=3)
        columns = X.columns

        temp_columns = []
        for index, char in enumerate(self.chromosome):
            if char == '1':
                temp_columns.append(columns[index])

        temp_X = X[temp_columns]
        train_X, test_X, train_y, test_y = train_test_split(temp_X, y, test_size=0.2, random_state=0) 
        knn.fit(train_X, train_y)
        self.fitness = knn.score(test_X, test_y)
        print(self.fitness)
    
    def mutate(self, offset):
        if self.chromosome[offset] == '1':
            self.chromosome = self.chromosome[:offset] + '0' + self.chromosome[offset+1:]
        else:
            self.chromosome = self.chromosome[:offset] + '1' + self.chromosome[offset+1:]





In [104]:
class GeneticAlgorithm:
    def __init__(self, population_size, chromosome_length, crossover_rate, mutation_rate, X, y):
        self.p = population_size
        self.l = chromosome_length
        self.c = crossover_rate
        self.m = mutation_rate
        self.X = X
        self.y = y

    def initiate_population(self):
        population = []
        for _ in range(self.p):
            chromosome = []
            for _ in range(self.l):
                chromosome.append(choice(['0', '1']))
            population.append(Individual(''.join(chromosome)))
        for index, chromosome in enumerate(population):
            print(index, chromosome)
        return population

    def evaluate_population(self, population):
        for chromosome in population:
            chromosome.evaluate(self.X, self.y)
    
    def total_fitness(self, population):
        total = 0
        for chromosome in population:
            total += chromosome.fitness
        return total

    def average_fitness(self, population):
        return self.total_fitness(population)/self.p

    def best_fitness(self, population):
        maxf = population[0].fitness
        for chromosome in population:
            if maxf < chromosome.fitness:
                maxf = chromosome.fitness
        return maxf
    
    def best_individual(self, population):
        maxf = population[0].fitness
        individual = population[0]
        for chromosome in population:
            if maxf < chromosome.fitness:
                maxf = chromosome.fitness
                individual = chromosome
        return individual

    def print_fitness(self, population):
        print('''
        Total Fitness: {}
        Average Fitness: {}
        Best Fitness: {}
        Best Individual: {}
        '''.format(
            self.total_fitness(population),
            self.average_fitness(population),
            self.best_fitness(population),
            self.best_individual(population)
        ))
    
        
    def tournament(self, population):
        temp = []
        size = len(population)
        for _ in range(size):
            parent1 = population[choice(range(size))]
            parent2 = population[choice(range(size))]

            if parent1.fitness > parent2.fitness:
                temp.append(parent1)
            else:
                temp.append(parent2)
        return temp

    def crossover(self, population):
        for i in range(0, self.p - 1, 2):
            offspring1 = []
            offspring2 = []
            xpoint = 1 + choice(range(self.l - 1))
            if random() < self.c:
                for j in range(xpoint):
                    offspring1.append(population[i].chromosome[j])
                    offspring2.append(population[i+1].chromosome[j])

                for j in range(xpoint, self.l):
                    offspring1.append(population[i+1].chromosome[j])
                    offspring2.append(population[i].chromosome[j])

            if len(offspring1) > 0 and len(offspring2) > 0:
                population[i] = Individual(''.join(offspring1))
                population[i].evaluate(self.X, self.y)
                population[i+1] = Individual(''.join(offspring2))
                population[i+1].evaluate(self.X, self.y)
        return population

    def mutate_population(self, population):
        for i in range(self.p):
            for j in range(self.l):
                if(random() < self.m):
                    population[i].mutate(j)
        return population

    def elitism(self, population):
        worst = population[0]
        worst_offset = 0
        for i in range(self.p):
            if population[i].fitness <= worst.fitness:
                worst = population[i]
                worst_offset = i
        
        best = population[0]
        best_offset = 0
        for i in range(self.p):
            if population[i].fitness >= best.fitness:
                best = population[i]
                best_offset = i
        
        population[worst_offset] = population[best_offset]
        return population

    def return_fitness(self, population):
        return {
            'Total Fitness': self.total_fitness(population),
            'Average Fitness': self.average_fitness(population),
            'Best Fitness': self.best_fitness(population),
            
        }



In [105]:
def runner(P, C, M, G):
    df = pd.read_csv('bank.csv')
   
    X, y = df.drop('y', axis=1), df['y']

    X = preprocessor(X)

    population_size = int(P)
    chromosome_length = len(X.columns)
    crossover_rate = float(C)
    mutation_rate = float(M)
    generations = int(G)

    print(type(population_size))

    ga = GeneticAlgorithm(
        population_size=population_size,
        chromosome_length=chromosome_length,
        crossover_rate=crossover_rate,
        mutation_rate=mutation_rate,
        X=X,
        y=y
    )

    fitness_data = {}
    

    population = ga.initiate_population()
    ga.evaluate_population(population)
    ga.print_fitness(population)

    fitness_data[0] = ga.return_fitness(population)
    

    for generation in range(1, generations+1):
        
        print('Generation', generation)
        population = ga.tournament(population)
        ga.evaluate_population(population)

        population = ga.crossover(population)
        ga.evaluate_population(population)

        population = ga.mutate_population(population)
        ga.evaluate_population(population)

        population = ga.elitism(population)
        ga.evaluate_population(population)
        ga.print_fitness(population)
        
        
        fitness_data[generation] = ga.return_fitness(population)
        
    
    return fitness_data

if __name__ == '__main__':
    fitness_data = runner(10, 0.95, 0.02, 5)
    print(fitness_data)
    
    
    

<class 'int'>
0 1100101011100110
1 0010010001001100
2 1111010001000000
3 0110100011011111
4 0000011100111010
5 1011110111011010
6 1100010011101000
7 1010011100000100
8 0100100001010011
9 1100111000001001
0.8696132596685083
0.8552486187845304
0.8519337016574585
0.8651933701657458
0.8751381215469614
0.8674033149171271
0.8486187845303867
0.8497237569060774
0.856353591160221
0.8430939226519337

        Total Fitness: 8.58232044198895
        Average Fitness: 0.858232044198895
        Best Fitness: 0.8751381215469614
        Best Individual: 0000011100111010
        
Generation 1
0.856353591160221
0.8651933701657458
0.8674033149171271
0.8497237569060774
0.8552486187845304
0.8696132596685083
0.8751381215469614
0.856353591160221
0.8751381215469614
0.8651933701657458
0.856353591160221
0.8651933701657458
0.8497237569060774
0.8674033149171271
0.8530386740331491
0.8574585635359117
0.8751381215469614
0.8541436464088398
0.8651933701657458
0.8585635359116022
0.856353591160221
0.8651933701657458
0.84

In [106]:
input_individual="0101010110110001"
L=list(input_individual)
print(L)
res=['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
       'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays',
       'previous', 'poutcome', 'y']

['0', '1', '0', '1', '0', '1', '0', '1', '1', '0', '1', '1', '0', '0', '0', '1']


In [107]:
R=['0']
N=[]
for i in range(len(L)):
    if L[i] in R:
        N.append(i)
print(N)

[0, 2, 4, 6, 9, 12, 13, 14]


In [108]:
final_col=[]
for i in N:
    final_col.append(res[i])
print(final_col)
    

['age', 'marital', 'default', 'housing', 'day', 'campaign', 'pdays', 'previous']


In [140]:
data=pd.read_csv('bank.csv')
data.columns


Index(['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
       'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays',
       'previous', 'poutcome', 'y'],
      dtype='object')

In [141]:
data.drop(columns=final_col,axis='columns',inplace=True)

In [143]:
le = LabelEncoder()
data = data.apply(le.fit_transform)

#print(data)

In [147]:
x = data.loc[:, data.columns!= 'y']
y = data.loc[:, data.columns == 'y']
x.head()


Unnamed: 0,job,education,balance,loan,contact,month,duration,poutcome
0,10,0,1475,0,0,10,75,3
1,7,1,2030,1,0,8,216,0
2,4,2,1303,0,0,0,181,0
3,4,2,1352,1,2,6,195,3
4,1,1,274,0,2,8,222,3


In [145]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state = 0)


In [151]:
from sklearn import preprocessing, neighbors
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report
clf = neighbors.KNeighborsClassifier()

clf.fit(x_train, y_train)
array_test=np.array(x_test)

knnpre = clf.predict(x_test)
'''for i in range(len(knnpre)):
    if(knnpre[i]==1):
        print(knnpre[i],array_test[i])
        inverse_data=le.inverse_transform(array_test[i])
        print("inverse_data",inverse_data)'''
    

##########Results

print(confusion_matrix(y_test,knnpre))
print(classification_report(y_test,knnpre))
KKNA = accuracy_score(y_test, knnpre)
print("The Accuracy for KNN is {}".format(KKNA))

[[761  32]
 [ 91  21]]
              precision    recall  f1-score   support

           0       0.89      0.96      0.93       793
           1       0.40      0.19      0.25       112

    accuracy                           0.86       905
   macro avg       0.64      0.57      0.59       905
weighted avg       0.83      0.86      0.84       905

The Accuracy for KNN is 0.8640883977900552


  clf.fit(x_train, y_train)


In [159]:
row=pd.DataFrame(['student',0,638,0,2,8,421,3])
print(row.shape)

user_input=row.transpose()
user_input.shape
user_array=np.array(row)
user_input=le.transform(user_array)

knnpre = clf.predict(user_input)

(8, 1)


  return f(**kwargs)


ValueError: y contains previously unseen labels: 'student'