In [1]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [2]:
def load_dataset():
    data = np.loadtxt("unknown.data", delimiter=',')
    # Split into features (X) and labels (y)
    y = data[:, 0]          # First column is the target class
    X = data[:, 1:]         # All other columns are features
    
    return X, y

# Load and scale data  
X, y = load_dataset()
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [3]:
def evaluate_accuracy(feature_subset, X, y):
    
    mask = feature_subset.astype(bool)
    
    if not np.any(mask):
        return 0.0
    
    X_subset = X[:, mask]
    X_train, X_test, y_train, y_test = train_test_split(X_subset, y, test_size=0.4, random_state=42)
    
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X_train, y_train)
    
    return knn.score(X_test, y_test)

def fitness_function(chromosome):
    num_selected = np.sum(chromosome)
    fitness = evaluate_accuracy(chromosome, X, y)
    if num_selected > 5:
        penalty = (num_selected / chromosome.shape[0]) ** 2
        fitness -= penalty
    
    return round(fitness * 100, 2)
    

In [4]:
# Sample Chromosome
chrom = np.zeros(13, dtype=int)
chrom_rand = np.random.randint(0, 2, 13)
print(f"Chromosome --> {chrom_rand} \n   fitness --> {fitness_function(chrom_rand)}")

Chromosome --> [1 0 1 0 0 1 0 1 1 1 1 0 0] 
   fitness --> 64.06
