In [9]:
import numpy as np
import random
import math
from sklearn.model_selection import train_test_split


np.random.seed(42)  


X = np.random.rand(1000, 5) 


Y = np.random.randint(0, 2, size=1000) 

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)



genes = ['X1', 'X2', 'X3', 'X4', 'X5']  


def fitness_function(expression, X, Y):
    
    accuracy = random.uniform(0.5, 1.0) 
    return accuracy


def generate_expression():
    return random.sample(genes, len(genes))


def mutate(expression):
    i, j = random.sample(range(len(expression)), 2)
    expression[i], expression[j] = expression[j], expression[i]
    return expression


def crossover(parent1, parent2):
    cut = random.randint(1, len(parent1) - 2)
    child = parent1[:cut] + [gene for gene in parent2 if gene not in parent1[:cut]]
    return child


def generate_population(pop_size):
    return [generate_expression() for _ in range(pop_size)]


def select(population, X, Y):
    
    population.sort(key=lambda expression: fitness_function(expression, X, Y), reverse=True)
    return population[0] 


def gene_expression_algorithm(X, Y, pop_size=10, generations=10, mutation_rate=0.3):
    print("=== Gene Expression Algorithm for Heart Disease Risk Detection ===")
    print(f"Population Size: {pop_size}")
    print(f"Generations: {generations}")
    print(f"Mutation Rate: {mutation_rate}\n")

    population = generate_population(pop_size)
    best_solution = None
    best_fitness = 0

    for gen in range(generations):
        print(f"\n--- Generation {gen + 1} ---")
        fitness_values = []
        for i, expression in enumerate(population):
            fitness_val = fitness_function(expression, X, Y)
            fitness_values.append(fitness_val)
            print(f"Expression {i + 1}: {expression}  | Fitness = {round(fitness_val, 4)}")

        
        new_population = []
        for _ in range(pop_size):
            parent1 = select(population, X, Y)
            parent2 = select(population, X, Y)
            child = crossover(parent1, parent2)

            if random.random() < mutation_rate:
                child = mutate(child)

            new_population.append(child)

        population = new_population

        
        generation_best = select(population, X, Y)
        generation_best_fitness = fitness_function(generation_best, X, Y)
        if generation_best_fitness > best_fitness:
            best_fitness = generation_best_fitness
            best_solution = generation_best

    print("\n=== Final Best Solution ===")
    print(f"Best Expression (Chromosome): {best_solution}")
    print(f"Best Fitness (Accuracy): {round(best_fitness, 4)}")

    return best_solution, best_fitness


best_solution, best_fitness = gene_expression_algorithm(X_train, Y_train, pop_size=6, generations=5, mutation_rate=0.3)


def evaluate_model_on_test_data(best_solution, X_test, Y_test):
    # Simulate model performance on test data
    accuracy = random.uniform(0.4, 1.0)  # Random test accuracy
    return accuracy


test_accuracy = evaluate_model_on_test_data(best_solution, X_test, Y_test)
print(f"\nTest Accuracy: {round(test_accuracy, 4)}")


=== Gene Expression Algorithm for Heart Disease Risk Detection ===
Population Size: 6
Generations: 5
Mutation Rate: 0.3


--- Generation 1 ---
Expression 1: ['X3', 'X1', 'X2', 'X5', 'X4']  | Fitness = 0.7214
Expression 2: ['X4', 'X5', 'X3', 'X2', 'X1']  | Fitness = 0.9521
Expression 3: ['X2', 'X5', 'X4', 'X1', 'X3']  | Fitness = 0.686
Expression 4: ['X2', 'X5', 'X1', 'X4', 'X3']  | Fitness = 0.8025
Expression 5: ['X3', 'X5', 'X2', 'X1', 'X4']  | Fitness = 0.9556
Expression 6: ['X4', 'X1', 'X5', 'X3', 'X2']  | Fitness = 0.5817

--- Generation 2 ---
Expression 1: ['X3', 'X5', 'X4', 'X1', 'X2']  | Fitness = 0.5532
Expression 2: ['X2', 'X4', 'X5', 'X3', 'X1']  | Fitness = 0.5429
Expression 3: ['X3', 'X1', 'X2', 'X4', 'X5']  | Fitness = 0.8626
Expression 4: ['X4', 'X5', 'X3', 'X2', 'X1']  | Fitness = 0.9263
Expression 5: ['X3', 'X1', 'X2', 'X4', 'X5']  | Fitness = 0.9867
Expression 6: ['X3', 'X2', 'X5', 'X4', 'X1']  | Fitness = 0.7848

--- Generation 3 ---
Expression 1: ['X4', 'X3', 'X5', '