<a href="https://colab.research.google.com/github/ShettyTanya/bis_lab/blob/main/exp7_gene_expression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Step 1: Define the Problem (Feature Selection Objective)
def objective_function(selected_features, X_train, y_train, X_test, y_test):
    # Perform classification using only the selected features
    X_train_selected = X_train[:, selected_features]
    X_test_selected = X_test[:, selected_features]

    # Train a Support Vector Classifier (SVC)
    model = SVC(kernel='linear')
    model.fit(X_train_selected, y_train)

    # Make predictions and evaluate accuracy
    y_pred = model.predict(X_test_selected)
    accuracy = accuracy_score(y_test, y_pred)

    return accuracy  # We aim to maximize accuracy

# Step 2: Initialize Parameters
population_size = 30  # Number of individuals in the population
num_features = 4      # Number of features in the dataset
mutation_rate = 0.2   # Mutation rate
crossover_rate = 0.9  # Crossover rate
num_generations = 100 # Number of generations
feature_range = (0, 1) # Range for gene values (0 or 1 for feature selection)

# Step 3: Initialize Population (Random gene sequences)
def initialize_population(population_size, num_features):
    population = []
    for _ in range(population_size):
        # Random sequence of 0s and 1s (0 for not selected, 1 for selected)
        individual = [random.randint(0, 1) for _ in range(num_features)]
        population.append(individual)
    return population

# Step 4: Evaluate Fitness (Objective Function)
def evaluate_fitness(population, X_train, y_train, X_test, y_test):
    fitness_values = []
    for individual in population:
        selected_features = [i for i in range(len(individual)) if individual[i] == 1]
        if len(selected_features) == 0:
            fitness_values.append(0)  # No features selected leads to a poor fitness value
        else:
            fitness = objective_function(selected_features, X_train, y_train, X_test, y_test)
            fitness_values.append(fitness)
    return fitness_values

# Step 5: Selection (Roulette Wheel Selection)
def selection(population, fitness_values):
    total_fitness = sum(fitness_values)
    probabilities = [fitness / total_fitness for fitness in fitness_values]
    selected = random.choices(population, weights=probabilities, k=2)  # Select 2 individuals
    return selected

# Step 6: Crossover (Single-point crossover)
def crossover(parent1, parent2):
    if random.random() < crossover_rate:
        crossover_point = random.randint(0, len(parent1) - 1)  # Single-point crossover
        offspring1 = parent1[:crossover_point] + parent2[crossover_point:]
        offspring2 = parent2[:crossover_point] + parent1[crossover_point:]
        return offspring1, offspring2
    else:
        return parent1, parent2

# Step 7: Mutation (Random mutation of genes)
def mutation(offspring):
    if random.random() < mutation_rate:
        mutation_point = random.randint(0, len(offspring) - 1)
        offspring[mutation_point] = 1 - offspring[mutation_point]  # Flip the gene (0 to 1 or 1 to 0)
    return offspring

# Step 8: Gene Expression (Direct mapping in feature selection)
def gene_expression(individual):
    # In this case, the gene expression directly maps the gene sequence to feature selection
    selected_features = [i for i in range(len(individual)) if individual[i] == 1]
    return selected_features

# Step 9: Iterate through generations (Genetic Algorithm loop)
def genetic_algorithm(X_train, y_train, X_test, y_test):
    population = initialize_population(population_size, num_features)
    best_solution = None
    best_fitness = 0

    for generation in range(num_generations):
        fitness_values = evaluate_fitness(population, X_train, y_train, X_test, y_test)

        # Track the best solution in the population
        for i in range(population_size):
            if fitness_values[i] > best_fitness:
                best_fitness = fitness_values[i]
                best_solution = population[i]

        # Selection, Crossover, Mutation, Gene Expression
        new_population = []
        for _ in range(population_size // 2):  # We need to generate population_size offspring
            parent1, parent2 = selection(population, fitness_values)
            offspring1, offspring2 = crossover(parent1, parent2)
            offspring1 = mutation(offspring1)
            offspring2 = mutation(offspring2)
            new_population.append(offspring1)
            new_population.append(offspring2)

        population = new_population  # Update population for the next generation

    return best_solution, best_fitness

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Run the genetic algorithm for feature selection
best_solution, best_fitness = genetic_algorithm(X_train, y_train, X_test, y_test)

# Output the best solution (selected features) and fitness (accuracy)
print("Best Feature Set: ", best_solution)
print("Best Fitness (Accuracy): ", best_fitness)


Best Feature Set:  [0, 1, 1, 1]
Best Fitness (Accuracy):  1.0
