### Experimental Comparison Notebook

In [1]:
# Imports
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from tetris_main import *
from utils import *
import random
from library.genetic_algorithm import gao
import os, time, copy, pickle

We would like to retain:
* a) Universal Parameters. Grid shape, pieces list, pop_size, elites,iterations, generations.
* b) Configuration Parameters
* c) Results of ABF
* d) SR Ratio Results
* e) one example of the fitness and representation of the best individual achieved

In [2]:
# Set the path to save this information to.
dir_path = os.path.join(os.getcwd(),"comparison_results","0")
timestamp = time.time()

##### Parameter Setting

In [17]:
# Set the parameters here for the configurations to compare, then run the entire notebook!

# Option for a medium sized grid, with 9 pieces.
grid_shape = (8,12)
max_fitness = grid_shape[0] * grid_shape[1]
pieces_list = pieces_generator(grid_shape, rotation=False)

# Parameters for GA. These are the parameters that must be the same for this type of comparison.
# (According to the booklet)
pop_size = 20
n_elites = 2
iterations = 30  # The number of runs | iterations.
generations = 100

# These are the parameters that correspond and differentiate configurations to test.

# What GA configs to test.
GA_configs_list = []

counter = 0
for sel_type in ["tournament", "fps"]:
    for co_type in ["pmx", "cycle"]:
        for p_co in [1.0]:
            for mut_type in ["swap"]:
                for p_mut in [0.4]:
                    for n_mut in [1]:
                        for p_rot in [0.25]:
                            for p_adpt in [0.5]:
                                GA_configs_list.append(
                                    {
                                        "name": f"{counter}_{sel_type}_{co_type}[{p_co}]_{mut_type}[{p_mut},{n_mut}]_rot[{p_rot}]_adpt[{p_adpt}]",
                                        "params": {
                                            "generations": generations,
                                            "selection_type": sel_type,
                                            "tournament_size": 20,
                                            "crossover_type": co_type,
                                            "p_crossover": p_co,
                                            "mutation_type": mut_type,
                                            "p_mutation": p_mut,
                                            "n_mutations": n_mut,
                                            "p_rot_mut": p_rot,
                                            "p_adoption": p_adpt,
                                            "hc_hardstop": 0,
                                        },
                                    }
                                )
                                counter += 1
# This is optional setting, since the default will be zero for all.
rotations = ["0" for i in range(len(pieces_list))]
print(len(GA_configs_list))

4


---
##### Initial Visualization

In [18]:
# This cell just makes an initial visualization of the grid and pieces.

# This will generate the individual automatically for the just the initial visualization.
vis_individual = [x+y for x,y in list(zip(pieces_list,rotations))]

grid, pieces_coordinates = tetrimino_fitter(vis_individual, grid_shape)
plot_grid(
    ind_fitness = 0,
    grid=grid,
    pieces_coordinates=pieces_coordinates,
    save_html_name= None,
    save_png_name= None,
    marker_size= 20,
    width= 600,
    height= 600
)

---
#### GA algorithm runs

In [19]:
# Initialize a dict where we store fitness history for each run.
fitness_dict = {}
# Initialize a dict for ABF for configs tested.
ABF_dict = {}

# Initialize a list to hold the collection of the best individuals.
best_individuals = {config["name"]:[] for config in GA_configs_list}

In [20]:
print(f"initializing individuals")
# For each run, have a specific population in all configs
individuals_iters = [
    [
        generate_individual(pieces_list, grid_shape, hc_hardstop=0)
        for i in range(pop_size)
    ]
    for _ in range(iterations)
]
for i, GA_config in enumerate(GA_configs_list):
    print(f"[{i/len(GA_configs_list)*100}%] config: {GA_config['name']}")
    print("Run:", end=" ")
    for iteration in range(iterations):
        # Initialize individuals and the population.
        print(f"{iteration}", end=" - ")
        pop = Population(
            copy.deepcopy(individuals_iters[iteration]), "max", n_elites, pieces_list, grid_shape
        )
        gao.GAO(
            pop, verbose=False, **GA_config["params"],
        )
        # save fitness history each iteration.
        fitness_dict[iteration] = pop.fitness_history

        # Save the best individual of the iteration. With this construction, they will be easy to find later.
        best_individuals[GA_config["name"]].append(pop.elites[0])
    print()
    # Build a dataframe / table with fitness per generation.
    df = pd.DataFrame(fitness_dict)

    # Record the Average Best Fitness in the respective dict.
    ABF_dict[GA_config["name"]] = df.mean(axis=1)



initializing individuals
[0.0%] config: 0_tournament_pmx[1.0]_swap[0.4,1]_rot[0.25]_adpt[0.5]
Run: 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 
[25.0%] config: 1_tournament_cycle[1.0]_swap[0.4,1]_rot[0.25]_adpt[0.5]
Run: 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 
[50.0%] config: 2_fps_pmx[1.0]_swap[0.4,1]_rot[0.25]_adpt[0.5]
Run: 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 
[75.0%] config: 3_fps_cycle[1.0]_swap[0.4,1]_rot[0.25]_adpt[0.5]
Run: 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 


---
#### Average Best Fitness Comparison

In [21]:
ABF_df.min()[0]

7556

In [36]:
ABF_df

Unnamed: 0,Tournament PMX,Tournament CycleCX,FPS PMX,FPS CycleCX
0,7253,7253,7253,7253
1,7550,7542,7472,7449
2,7696,7763,7682,7632
3,7875,7885,7810,7712
4,7975,7891,7854,7753
...,...,...,...,...
95,8760,8776,8508,8491
96,8760,8776,8511,8491
97,8760,8777,8511,8494
98,8761,8798,8511,8494


In [43]:
# Output a DF with the ABF comparison.
timestamp = time.time()
ABF_df = np.round(pd.DataFrame(ABF_dict)).astype(int)
ABF_df.columns = ["Tournament PMX", "Tournament CycleCX", "FPS PMX", "FPS CycleCX"]
plot_fitness(
    ABF_df,
    f"configs/final_abf_graph_{timestamp}",
    save_png_name=f"configs/final_abf_graph_{timestamp}",
    width=800,
    height=400,
    max_fitness=grid.size * 100,
    filename="configs/final_graph",
    save_json=True,
)
fig = px.line(
    ABF_df,
    labels={
        "index": "Generation",
        "value": "Average Best Fitness",
        "variable": "Configuration",
    },
    template="plotly_white",
)
fig.write_image(f"graphs/ABF_graph_{time.time()}.png", scale=2)
fig.show()


In [23]:
master_dict = {
    "general": {
        "pop_size": pop_size,
        "generations": generations,
        "grid_shape": grid_shape,
        "iterations": iterations,
        "pieces_list": pieces_list,
    },
    **{config["name"]: config for config in GA_configs_list},
}
master_dict["general"]["ABF_df"] = ABF_df

for k, v in best_individuals.items():
    master_dict[k]["best_individuals"] = v

with open(f"configs/fsx_master_dict{timestamp}.pkl", "wb") as f:
    pickle.dump(master_dict, f)

---
#### Successful Runs / Performed Runs

In [24]:
# Output a DF with the best individuals per run and configuration.
best_ind_df = pd.DataFrame(best_individuals)

# The same DF with the fitness values only.
best_ind_df_fit = best_ind_df.applymap(lambda x:x.fitness).astype(int)

# Register max possible fitness, as well as best fitness achieved.
max_fitness = int(np.sum(grid.size*100))
best_fitness = max(best_ind_df_fit.max())
print(f"The max possible fitness was {max_fitness}.\nThe best fitness achieved was {best_fitness}.")

# Print the SR / PR ratio to 2 decimals.
print(f"Total runs: {iterations}\nRatio of Successful Runs / Performed Runs:")
# Pandas count function counts for each column (skipping NAs) and outputs a series, which is useful for this purpose.
print(np.round(best_ind_df[best_ind_df_fit==max_fitness].count()/iterations,2))
if max_fitness != best_fitness:
    print("Max fitness not achieved. Using the same ratio for best fitness.")
    print(np.round(best_ind_df[best_ind_df_fit==best_fitness].count()/iterations,2))

The max possible fitness was 9600.
The best fitness achieved was 9166.
Total runs: 30
Ratio of Successful Runs / Performed Runs:
0_tournament_pmx[1.0]_swap[0.4,1]_rot[0.25]_adpt[0.5]      0.0
1_tournament_cycle[1.0]_swap[0.4,1]_rot[0.25]_adpt[0.5]    0.0
2_fps_pmx[1.0]_swap[0.4,1]_rot[0.25]_adpt[0.5]             0.0
3_fps_cycle[1.0]_swap[0.4,1]_rot[0.25]_adpt[0.5]           0.0
dtype: float64
Max fitness not achieved. Using the same ratio for best fitness.
0_tournament_pmx[1.0]_swap[0.4,1]_rot[0.25]_adpt[0.5]      0.03
1_tournament_cycle[1.0]_swap[0.4,1]_rot[0.25]_adpt[0.5]    0.03
2_fps_pmx[1.0]_swap[0.4,1]_rot[0.25]_adpt[0.5]             0.00
3_fps_cycle[1.0]_swap[0.4,1]_rot[0.25]_adpt[0.5]           0.00
dtype: float64


In [25]:
# This is a pandas way of getting the best indidividuals for visualization.
best_inds = best_ind_df[best_ind_df_fit==best_fitness]

# Transform it into lists, skipping Nones.
best_inds = best_inds.apply(lambda x:x.dropna().to_list())

---
#### Visualize an example of the best individual.

In [42]:
# The first element of those lists will be as good as any other element.
for k, v in best_individuals.items():
    for i in v:
        if i.fitness == best_fitness:
            best_ind = i
            break
        
grid, pieces_coordinates = tetrimino_fitter(best_ind.representation, grid_shape)

plot_grid(
    ind_fitness=best_ind.fitness,
    grid=grid,
    pieces_coordinates=pieces_coordinates,
    save_html_name= f"graphs/best_rep_{time.time()}",
    save_png_name= f"graphs/best_rep_{time.time()}",
    marker_size= 60,
    width= 800,
    height= 600
)

---
#### Information Logging

In [27]:
# Cell for logging all the information.
filename = f"configs/parameters_{timestamp}.txt"
file1 = open(filename,"a+")

toFile = "grid_shape = " + str(grid_shape) + "\n"
toFile += "pieces_list = " + str(pieces_list) + "\n\n"
toFile += "Universal Parameters\n" + "pop_size: " + str(pop_size) + "\n"
toFile += "n_elites: " + str(n_elites) + "\n" + "iterations: " + str(iterations) + "\n"
toFile += "generations: " + str(generations) + "\n"
toFile += "\nConfig Parameters:\n"

for GA_config in GA_configs_list:
    toFile+= "Name: " + str(GA_config["name"]) + "\n{"
    for param in GA_config["params"].keys():
        toFile += str(param) + ": " + str(GA_config["params"][param]) + ", "
    toFile += "}\n\n"

file1.write(toFile)
file1.close()

In [28]:
# Cell for logging the SR information.
filename = f"configs/SR_ratio_{timestamp}.txt"
file2 = open(filename,"a+")

toFile = "SR Ratio Results\n\n"
toFile+= f"The max possible fitness was {max_fitness}.\nThe best fitness achieved was {best_fitness}.\n"
toFile+= f"Total runs: {iterations}\nRatio of Successful Runs / Performed Runs:\n"
toFile+= np.round(best_ind_df[best_ind_df_fit==max_fitness].count()/iterations,2).to_string()

if max_fitness != best_fitness:
    toFile += "\nMax fitness not achieved. Using the same ratio for best fitness.\n"
    toFile += np.round(best_ind_df[best_ind_df_fit==best_fitness].count()/iterations,2).to_string()

toFile += f"\nOne representative individual of best fitness is: {best_ind.representation}, with {best_ind.fitness} fitness."

file2.write(toFile)
file2.close()

In [17]:
# After it is done, please move all files from 0 directory, as well as the plotly graphs from graphs folder.