In [83]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [84]:
def load_dataset():
    data = np.loadtxt("unknown.data", delimiter=',')
    # Split into features (X) and labels (y)
    y = data[:, 0]          # First column is the target class
    X = data[:, 1:]         # All other columns are features
    
    return X, y

# Load and scale data  
X, y = load_dataset()
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [85]:
def evaluate_accuracy(feature_subset, X, y):
    
    mask = feature_subset.astype(bool)
    
    if not np.any(mask):
        return 0.0
    
    X_subset = X[:, mask]
    X_train, X_test, y_train, y_test = train_test_split(X_subset, y, test_size=0.4, random_state=42)
    
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X_train, y_train)
    
    return knn.score(X_test, y_test)

def fitness_function(chromosome):
    num_selected = np.sum(chromosome)
    fitness = evaluate_accuracy(chromosome, X, y)
    if num_selected > 5:
        penalty = (num_selected / chromosome.shape[0]) ** 2
        fitness -= penalty
    
    return round(fitness * 100, 2)
    

In [86]:
# Sample Chromosome
chrom = np.zeros(13, dtype=int)
chrom_rand = np.random.randint(0, 2, 13)
print(f"Chromosome --> {chrom_rand} \n   fitness --> {fitness_function(chrom_rand)}")

Chromosome --> [1 1 0 0 0 0 0 1 0 0 0 0 0] 
   fitness --> 77.78


In [87]:
population_size = 50
generations = 100
mutation_rate = 0.1
chromosome_length = 13

def initialize_population():
    return np.random.randint(0, 2, (population_size, chromosome_length))

def selection(population, fitnesses): #roulette (Im not a gambling addict btw)
    probs = fitnesses / np.sum(fitnesses)
    selected = population[np.random.choice(len(population), size = 2, p = probs)]
    return selected[0], selected[1]

def crossover(parent1, parent2): #single point
    point = np.random.randint(1, chromosome_length-1)
    child1 = np.concatenate([parent1[:point], parent2[point:]])
    child2 = np.concatenate([parent2[:point], parent1[point:]])
    return child1, child2

def mutate(chromosome):
    for i in range(chromosome_length):
        if np.random.rand() < mutation_rate:
            chromosome[i] = 1 - chromosome[i]
    return chromosome

In [88]:
def genetic_algorithm():
    population = initialize_population()
    best_chromosome_ever = None
    best_fitness_ever = -np.inf

    for generation in range(generations):
        fitnesses = np.array([fitness_function(individual) for individual in population])

        current_best_index = np.argmax(fitnesses)
        current_best_fitness = fitnesses[current_best_index]
        current_best_chromosome = population[current_best_index]

        if current_best_fitness > best_fitness_ever:
            best_fitness_ever = current_best_fitness
            best_chromosome_ever = current_best_chromosome.copy()

        print(f"Generation {generation+1}: Best Fitness = {current_best_fitness}, Chromosome = {current_best_chromosome}")
        
        new_population = []
        for _ in range(population_size // 2):
            parent1, parent2 = selection(population, fitnesses)
            child1, child2 = crossover(parent1, parent2)
            new_population.append(mutate(child1))
            new_population.append(mutate(child2))

        population = np.array(new_population)

    return best_chromosome_ever, best_fitness_ever

In [89]:
best_chromosome, best_fitness = genetic_algorithm()
print(f"\n Best Chromosome: {best_chromosome}, Fitness: {best_fitness}")

Generation 1: Best Fitness = 97.22, Chromosome = [1 1 0 0 0 0 1 0 0 0 0 0 1]
Generation 2: Best Fitness = 98.61, Chromosome = [1 0 0 0 0 0 1 1 0 1 0 1 0]
Generation 3: Best Fitness = 94.44, Chromosome = [1 1 0 0 0 0 1 0 0 0 0 0 0]
Generation 4: Best Fitness = 97.22, Chromosome = [1 1 0 0 0 0 0 0 0 1 1 0 1]
Generation 5: Best Fitness = 95.83, Chromosome = [0 0 0 0 0 0 0 0 0 1 1 0 1]
Generation 6: Best Fitness = 97.22, Chromosome = [1 1 0 1 0 0 0 0 0 1 0 1 0]
Generation 7: Best Fitness = 95.83, Chromosome = [1 1 0 0 0 0 1 1 0 1 0 0 0]
Generation 8: Best Fitness = 97.22, Chromosome = [1 0 0 0 1 0 0 0 0 1 1 1 0]
Generation 9: Best Fitness = 97.22, Chromosome = [1 1 0 0 0 0 0 0 0 1 1 1 0]
Generation 10: Best Fitness = 97.22, Chromosome = [1 1 0 0 0 0 0 0 0 1 1 1 0]
Generation 11: Best Fitness = 95.83, Chromosome = [0 0 0 0 0 0 0 0 0 1 1 0 1]
Generation 12: Best Fitness = 98.61, Chromosome = [1 0 0 0 0 0 1 0 0 1 0 1 0]
Generation 13: Best Fitness = 100.0, Chromosome = [1 0 1 0 0 0 1 0 0 1 0 