In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import time
from tqdm import tqdm
from itertools import combinations

from GA_params_class.GeneticAlgorithm import GeneticAlgorithm
from function_module import *

In [2]:
cnn_model_path = '../../Models/CNN_6_1_2.keras'
masked_sequence = 'AATACTAGAGGTCTTCCGACNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGTGTGGGCGGGAAGACAACTAGGGG'
target_expressions = [0, 0.5, 1]
precision = None
verbose = 0
lineages = 30
test_cases = 10

In [None]:
# For each selection method, vary elitist_rate
selection_methods = ['boltzmann', 'rank_based', 'roulette', 'roulette_linear_scaling', 'steady_state', 'sus', 'tournament', 'tournament_without_replacement', 'tournament_pop', 'truncation']
range_elitist_rate = np.linspace(0, 1, test_cases)

total_combinations = len(target_expressions) * len(selection_methods) * len(range_elitist_rate) * lineages
progress_bar = tqdm(total=total_combinations, desc="Processing combinations", position=0)
initial_time = time.time()

current_combination = 0
results = []

for target_expression in target_expressions:
    for selection_method in selection_methods:
        for elitist_rate in range_elitist_rate:
            current_combination += 1
            ga = GeneticAlgorithm(
                cnn_model_path=cnn_model_path,
                masked_sequence=masked_sequence,
                target_expression=target_expression,
                precision=precision,
                verbose=verbose,
                selection=selection_method, # Dependant variable
                elitist_rate=elitist_rate, # Dependant variable
            )
            for _ in range(lineages):
                current_combination += 1

                # Time the run
                start_time = time.time()
                best_sequences, best_predictions = ga.run()
                end_time = time.time()

                # Record the results
                for sequence, prediction in zip(best_sequences, best_predictions):
                    results.append({
                        'target_expression': target_expression,
                        'selection_method' : selection_method,
                        'elitist_rate': elitist_rate,
                        'error': abs(prediction - target_expression),
                        'run_time': (end_time - start_time) / lineages
                    })
            
                # # Update progress bar
                # progress_bar.update(1)
                # elapsed_time = time.time() - initial_time
                # progress_bar.set_postfix({
                #     "Elapsed": format_time(elapsed_time),
                #     "ETA": format_time(((elapsed_time / current_combination) * (total_combinations - current_combination)))
                # })

# Close progress bar
progress_bar.close()

results_df = pd.DataFrame(results)
results_df.to_csv('Data/selection_methods/elitist_rates.csv', index=False)

In [None]:
for target_expression in target_expressions:
    filtered_df = results_df[results_df['target_expression'] == target_expression]
    heatmap(filtered_df, target_expression, 'selection_method', 'elitist_rate')