In [6]:
from pyeasyga.pyeasyga import GeneticAlgorithm
import random

genome_size = 100
population_size = 100

def create_individual(data):
    return [random.randint(0, 1) for _ in range(genome_size)]

In [66]:
def basic_population(mutation_probability):
    results = []
    cached = set()
   
    def fitness (individual, data):
        if tuple(individual) not in cached:
            cached.add(tuple(individual))

        return sum(individual)

    for _ in range(20):
        ga = GeneticAlgorithm(create_individual(None), 
                              crossover_probability=0, 
                              population_size=population_size,
                              mutation_probability=mutation_probability)
        ga.fitness_function = fitness
        ga.create_individual = create_individual

        generations = 1
        ga.create_first_generation()

        while ga.best_individual()[0] < genome_size:
            generations += 1
            ga.create_next_generation()
            
        results.append(generations)
    return results, len(cached), sum(results)*population_size

In [68]:
def ltee_population(mutation_probability):
    results = []
    cached = set()
   
    def fitness (individual, data):
        if tuple(individual) not in cached:
            cached.add(tuple(individual))

        return sum(individual)
    
    ga = GeneticAlgorithm(create_individual(None), 
                              crossover_probability=0, 
                              population_size=population_size,
                              mutation_probability=mutation_probability)
    ga.fitness_function = fitness
    ga.create_individual = create_individual


    ga.create_first_generation()
    
    starting_population = ga.current_generation
    for _ in range(20):
        ga = GeneticAlgorithm(create_individual(None), 
                              crossover_probability=0, 
                              population_size=population_size,
                              mutation_probability=mutation_probability)
        ga.fitness_function = fitness
        ga.create_individual = create_individual

        generations = 1
        ga.current_generation = starting_population

        while ga.best_individual()[0] < genome_size:
            generations += 1
            ga.create_next_generation()
            
        results.append(generations)
    return results, len(cached), sum(results)*population_size

In [71]:
def ltee_subpopulations(mutation_probability):
    results = []
    cached = set()
   
    def fitness (individual, data):
        if tuple(individual) not in cached:
            cached.add(tuple(individual))

        return sum(individual)
    
    for i in range(5):
    
        ga = GeneticAlgorithm(create_individual(None), 
                                  crossover_probability=0, 
                                  population_size=population_size,
                                  mutation_probability=mutation_probability)
        ga.fitness_function = fitness
        ga.create_individual = create_individual


        ga.create_first_generation()

        starting_population = ga.current_generation
        for _ in range(4):
            ga = GeneticAlgorithm(create_individual(None), 
                                  crossover_probability=0, 
                                  population_size=population_size,
                                  mutation_probability=mutation_probability)
            ga.fitness_function = fitness
            ga.create_individual = create_individual

            generations = 1
            ga.current_generation = starting_population

            while ga.best_individual()[0] < genome_size:
                generations += 1
                ga.create_next_generation()

            results.append(generations)
    return results, len(cached), sum(results)*population_size

In [109]:
results = []
trials = 20

methods = [basic_population, ltee_population, ltee_subpopulations]
mutation_rates = [0.01, 0.02, 0.03, 0.04]

for mutation_rate in mutation_rates:
    print(f"Mutation rate {mutation_rates.index(mutation_rate) + 1}/{len(mutation_rates)}")
    for i in range(trials):
        print(f"\tTrial {i+1}/{trials}")
        for method in methods:
            print(f"\t\tMethod {methods.index(method) + 1}/{len(methods)}")
            result = method(mutation_rate)
            results.append(dict(method=method.__name__,
                                mutation_rate=mutation_rate,
                                generations=result[0],
                                unique_individuals=result[1],
                                total_fitness_invocations=result[2]))

Mutation rate 1/4
	Trial 1/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 2/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 3/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 4/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 5/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 6/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 7/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 8/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 9/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 10/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 11/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 12/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 13/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 14/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 15/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 16/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 17/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 18/20
		Method 1/3
		Method 2/3
		Method 3/3
	Trial 19/20
		Method 1/3
		Method 2/3
		Method 3/3
	Tr

In [110]:
import pandas as pd

In [111]:
df = pd.DataFrame.from_dict(results)

In [112]:
df.to_csv("results.csv")

In [140]:
df = pd.DataFrame.from_csv("results.csv")

  """Entry point for launching an IPython kernel.


In [141]:
import ast

In [142]:
df.generations = df.generations.apply(ast.literal_eval)

In [114]:
from scipy.stats import f_oneway

In [268]:
print(f_oneway(df.loc[df.method == "basic_population"].total_fitness_invocations,
         df.loc[df.method == "ltee_population"].total_fitness_invocations,
         df.loc[df.method == "ltee_subpopulations"].total_fitness_invocations))

F_onewayResult(statistic=0.03926987584173631, pvalue=0.961497445800861)


In [267]:
print(f_oneway(df.loc[df.method == "basic_population"].unique_individuals,
         df.loc[df.method == "ltee_population"].unique_individuals,
         df.loc[df.method == "ltee_subpopulations"].unique_individuals))

F_onewayResult(statistic=69.52552282921005, pvalue=1.741538692690228e-24)


In [269]:
from statsmodels.stats.multicomp import MultiComparison
print(MultiComparison(df.unique_individuals.to_numpy(), df.method.to_numpy()).tukeyhsd())

             Multiple Comparison of Means - Tukey HSD,FWER=0.05             
     group1             group2        meandiff    lower      upper    reject
----------------------------------------------------------------------------
basic_population   ltee_population   -2103.4375 -2538.0506 -1668.8244  True 
basic_population ltee_subpopulations  -1523.4   -1958.0131 -1088.7869  True 
ltee_population  ltee_subpopulations  580.0375   145.4244  1014.6506   True 
----------------------------------------------------------------------------
