#### Description

This code uses <b>Random Search</b> to find optimal hyperparameters for the GA_2_2 GeneticAlgorithm run method:
* base_mutation_rate
* chromosomes
* islands
* num_parents
* gene_flow_rate

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import time
import random

from GA_2_2 import GeneticAlgorithm

In [2]:
# Control variables
cnn_model_path = '../Models/CNN_6_1_2.keras'
masked_sequence = 'NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN'
max_length = 150
pop_size = 20
generations = 100
precision = 0.1

# Testing variables
combinations = 10
run_per_combination = 10
results = []
target_expressions = [0, 1]

In [None]:
for target_expression in target_expressions:
    for combination_id in range(combinations):

        # Randomly select values for each of the dependent variables
        base_mutation_rate = random.uniform(0, 1)
        chromosomes = random.randint(1, 16)
        islands = random.randint(1, 16)
        num_parents = random.randint(1, 16)
        gene_flow_rate = random.uniform(0, 1)

        print(f'Running with base_mutation_rate={base_mutation_rate:.3f}, chromosomes={chromosomes}, islands={islands}, num_parents={num_parents}, gene_flow_rate={gene_flow_rate:.3f}', end='')

        for run_id in range(run_per_combination):
            ga = GeneticAlgorithm(
                cnn_model_path=cnn_model_path,
                masked_sequence=masked_sequence,
                target_expression=target_expression,
                max_length=max_length,
                pop_size=pop_size,
                generations=generations,
                base_mutation_rate=base_mutation_rate,  # Dependent variable
                precision=precision,
                chromosomes=chromosomes, # Dependent variable
                islands=islands, # Dependent variable
                num_parents=num_parents, # Dependent variable
                gene_flow_rate=gene_flow_rate, # Dependent variable
                print_progress=False
            )
            
            # Time the run
            start_time = time.time()
            best_sequence, best_prediction = ga.run()
            end_time = time.time()
            run_time = end_time - start_time
            
            # Record the results
            results.append({
                'target_expression': target_expression,
                'combination_id': combination_id + 1,
                'run_id': run_id + 1,
                'chromosomes': chromosomes,
                'num_parents': num_parents,
                'islands': islands,
                'gene_flow_rate': gene_flow_rate,
                'error': abs(best_prediction - target_expression),
                'run_time': run_time
            })
        # print Averaged combination error
        print(f' Avg. error={np.mean([result["error"] for result in results[-run_per_combination:]]):.3f}')

# Convert results to a DataFrame for analysis
results_df = pd.DataFrame(results)

Running with base_mutation_rate=0.750, chromosomes=9, islands=4, num_parents=13, gene_flow_rate=0.573Avg. error=0.172
Running with base_mutation_rate=0.538, chromosomes=16, islands=13, num_parents=5, gene_flow_rate=0.166

IndexError: list index out of range

In [None]:
results_df.head()