In [None]:
#! /usr/bin/env python

# PonyGE2
# Copyright (c) 2017 Michael Fenton, James McDermott,
#                    David Fagan, Stefan Forstenlechner,
#                    and Erik Hemberg
# Hereby licensed under the GNU GPL v3.
""" Python GE implementation """

from utilities.algorithm.general import check_python_version

check_python_version()
import matplotlib
matplotlib.use('Qt5Agg')
%matplotlib inline
import matplotlib.pyplot as plt
from stats.stats import get_stats
from algorithm.parameters import params, set_params
import sys
import time
import numpy as np
np.set_printoptions(linewidth=150)
import random
import os
import pickle
import seaborn as sns


from utilities.stats import trackers
from itertools import product
from IPython.display import display



## Find a corrispondence between the hyperparameters combination and the index of the array of results

In [None]:
def find_hyperparam_combination_index(desired_hyperparam_values, hyperparameters):
    # Generate all combinations of hyperparameter values
    hyperparam_combinations = list(product(*[hyperparam['list'] for hyperparam in hyperparameters]))
    
    for i, combination in enumerate(hyperparam_combinations):
        if all([desired_hyperparam_values[hyperparameters[j]['name']] == value for j, value in enumerate(combination)]):
            return i
    return -1  # Return -1 if the desired hyperparameter combination is not found

# Example usage
desired_hyperparam_values = {
    "MUTATION_PROBABILITY": 0.2,
    "ANOTHER_HYPERPARAMETER": 0.5
}

In [None]:
import numpy as np
from itertools import product

def reshape_test_results(test_results, seeds, hyperparameters):
    # Calculate the new shape for the test_results array
    new_shape = (len(seeds),) + tuple(len(hyperparam['list']) for hyperparam in hyperparameters)

    # Initialize an empty multidimensional array with the new shape
    reshaped_test_results = np.zeros(new_shape)

    # Iterate through the original test_results array
    for seed_index in range(len(seeds)):
        for i in range(test_results.shape[1]):
            # Calculate the indices in the reshaped array using np.unravel_index
            reshaped_indices = (seed_index,) + np.unravel_index(i, tuple(len(hyperparam['list']) for hyperparam in hyperparameters))

            # Assign the result to the reshaped_test_results array using the calculated indices
            reshaped_test_results[reshaped_indices] = test_results[seed_index, i]

    return reshaped_test_results

In [None]:

array_of_results = []

for i in range(5):
    file_path = f"./results_data/results_data_MUT_CROSS_TOURNSIZE_fold_{str(i+1)}.pkl"

    # Load the data
    with open(file_path, 'rb') as f:
        loaded_data = pickle.load(f)

    # Access the loaded data
    hyperparameters = loaded_data['hyperparameters']
    seeds = loaded_data['seeds']
    test_results = loaded_data['test_results']
    train_results = loaded_data['train_results']
    test_evolution = loaded_data['test_evolution']
    train_evolution = loaded_data['train_evolution']

    array_of_results.append(test_results)

for i in range(len(array_of_results)):
    print(array_of_results[i].shape) 

## Reshape and compute the average of the values across the seeds

In [None]:
reshaped_results_array = []
for i in range(len(array_of_results)):
    reshaped_results_array.append(reshape_test_results(array_of_results[i], seeds, hyperparameters))
    reshaped_results_array[i] = np.mean(reshaped_results_array[i], axis=0)
    print(reshaped_results_array[i].shape)


In [None]:
reshaped_results_array = []
for i in range(len(array_of_results)):
    reshaped_results_array.append(reshape_test_results(array_of_results[i], seeds, hyperparameters))
    reshaped_results_array[i] = np.mean(reshaped_results_array[i], axis=0)
    # print("Mean and std of fold ", str(i+1))
    # print(np.mean(reshaped_results_array[i]))
    # print(np.std(reshaped_results_array[i]))
    flat_arr = reshaped_results_array[i].flatten()
    plt.boxplot(flat_arr)


## Mean across different folds

In [None]:
folds = np.array(reshaped_results_array)
mean_across_folds = np.mean(folds, axis = 0)
std_across_folds = np.std(folds, axis = 0)
print(mean_across_folds.shape)
print(std_across_folds.shape)

In [None]:
"""import plotly.graph_objs as go
import numpy as np

# Assuming you have a (5, 5, 5) shaped array of mean test results
# Replace with your actual mean_test_results array

mean_test_results = mean_across_folds

# Prepare the data for the 3D scatter plot
x_values = []
y_values = []
z_values = []
mean_values = []

for i, hyperparam1 in enumerate(hyperparameters[0]['list']):
    for j, hyperparam2 in enumerate(hyperparameters[1]['list']):
        for k, hyperparam3 in enumerate(hyperparameters[2]['list']):
            x_values.append(hyperparam1)
            y_values.append(hyperparam2)
            z_values.append(hyperparam3)
            mean_values.append(mean_test_results[i, j, k])

# Create the 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=x_values,
    y=y_values,
    z=z_values,
    mode='markers',
    marker=dict(
        size=6,
        color=mean_values,                # set color to an array/list of desired values
        colorscale='Viridis',             # choose a colorscale
        opacity=0.8,
        showscale=True,
        colorbar=dict(title="Mean Test Result")
    ),
    text=mean_values
)])

# Customize the plot
fig.update_layout(scene=dict(xaxis_title=hyperparameters[0]['name'],
                             yaxis_title=hyperparameters[1]['name'],
                             zaxis_title=hyperparameters[2]['name']),
                  margin=dict(l=0, r=0, b=0, t=0))

# Show the plot
fig.show()
"""

In [None]:
import pandas as pd
import plotly.express as px

# Convert the mean_array and std_array to 1D arrays
mean_flat = mean_across_folds.flatten() 
std_flat = std_across_folds.flatten()

# Create a DataFrame with columns for x, y, z, mean_test_result, and std_dev
data = {
    hyperparameters[0]['name']: np.repeat(hyperparameters[0]['list'], len(hyperparameters[1]['list']) * len(hyperparameters[2]['list'])),
    hyperparameters[1]['name']: np.tile(np.repeat(hyperparameters[1]['list'], len(hyperparameters[2]['list'])), len(hyperparameters[0]['list'])),
    hyperparameters[2]['name']: np.tile(hyperparameters[2]['list'], len(hyperparameters[0]['list']) * len(hyperparameters[1]['list'])),
    'mean_test_result': mean_flat,
    'std_dev': std_flat
}

df = pd.DataFrame(data)
#pd.set_option('display.max_rows', 500)
#display(df)

# Create the 3D scatter plot
fig = px.scatter_3d(df,
                    x=hyperparameters[0]['name'],
                    y=hyperparameters[1]['name'],
                    z=hyperparameters[2]['name'],
                    color='mean_test_result',
                    size='std_dev',
                    size_max=50,
                    opacity=0.7)

# Customize the plot
fig.update_layout(scene=dict(xaxis_title=hyperparameters[0]['name'],
                             yaxis_title=hyperparameters[1]['name'],
                             zaxis_title=hyperparameters[2]['name']),
                  margin=dict(l=0, r=0, b=0, t=0))

fig.show()


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

mean_array = mean_across_folds

# Create a long format DataFrame
data = []
for i, h0 in enumerate(hyperparameters[0]['list']):
    for j, h1 in enumerate(hyperparameters[1]['list']):
        for k, h2 in enumerate(hyperparameters[2]['list']):
            data.append([h0, h1, h2, mean_array[i, j, k]])
df_long = df
fig, axes = plt.subplots(2, 3, figsize=(20, 12))

# Boxplot for each hyperparameter
for i, hyperparam in enumerate(hyperparameters):
    sns.boxplot(x=hyperparam['name'], y='mean_test_result', data=df_long, ax=axes[0, i])
    axes[0, i].set_title(f"Boxplot of Mean Test Result vs {hyperparam['name']}")
    axes[0, i].set_xticklabels(hyperparam['list'])

# Violin plot for each hyperparameter
for i, hyperparam in enumerate(hyperparameters):
    sns.violinplot(x=hyperparam['name'], y='mean_test_result', data=df_long, ax=axes[1, i])
    axes[1, i].set_title(f"Violin Plot of Mean Test Result vs {hyperparam['name']}")
    axes[1, i].set_xticklabels(hyperparam['list'])

plt.tight_layout()
plt.show()


In [None]:
correlation_matrix = df.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix of Hyperparameters and Mean Test Result')
plt.show()


In [None]:
for i in range(5):
    file_path = f"./results_data/results_data_fold_{str(i+1)}.pkl"

    with open(file_path, 'rb') as f:
        loaded_data = pickle.load(f)

    # Access the loaded data
    hyperparam = loaded_data['hyperparam']
    hyperparam_list = hyperparam['list']
    seeds = loaded_data['seeds']
    test_results = loaded_data['test_results']
    train_results = loaded_data['train_results']
    test_evolution = loaded_data['test_evolution']
    train_evolution = loaded_data['train_evolution']

    avg_train_evoltion = np.mean(train_evolution, axis = 0)
    avg_test_evolution = np.mean(test_evolution, axis = 0)

    print("VALUES FOR FOLD",i+1)
    for j in range(test_evolution.shape[1]):
        
        print("SEED is", seeds[j])
        print("MUT_Value", hyperparam_list[j])
        #plt.plot(train_evolution[0][j], label = "Train Performances")
        #plt.plot(test_evolution[0][j], label = "Test Performances")
        avg_train_evoltion = np.mean(train_evolution, axis=0)
        avg_test_evolution = np.mean(test_evolution, axis = 0)
        plt.plot(avg_train_evoltion[j], label = "Train Performances")
        plt.plot(avg_test_evolution[j], label = "Test Performances")
        plt.legend()
        
        plt.xlabel("Generations")
        plt.ylabel("Accuracy")

        plt.show()


In [None]:
matplotlib.use('Qt5Agg')
%matplotlib inline


# Generate mutation values
mean = np.mean(test_results, axis = 0)
std = np.std(test_results, axis = 0)

# Plot mean and standard deviation
plt.plot(hyperparam_list, mean, label="Mean")
plt.fill_between(hyperparam_list, mean - std, mean + std, alpha=0.3, label="Std")

# Set axis labels and title
plt.xlabel("Mutation Values", fontsize=16)
plt.ylabel("Fitness Values", fontsize=16)
plt.title("Mean and Standard Deviation", fontsize=16)

# Set xticks to display actual values of mutation_values rounded to 2 decimal places
rounded_x = np.round(hyperparam_list, 2)
plt.xticks(hyperparam_list, rounded_x, fontsize = 12)

# Add legend and display the plot
plt.legend()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming test_results is a 2D numpy array with shape (num_seeds, num_hyperparams)

# Calculate the average performance across seeds (axis 0)
avg_test_results = np.mean(test_results, axis=0)


# Set up the box plot
sns.set(style='whitegrid')
plt.figure(figsize=(10, 6))

# Create the box plot
sns.boxplot(data=test_results)

# Set plot labels and title
plt.xlabel('Hyperparameter Combination')
plt.ylabel('Performance')
plt.title('Box Plot of Test Results for Different Hyperparameter Combinations')

# Show the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Assuming avg_test_results is a 1D numpy array with shape (num_hyperparams,)
# and hyperparam_values is a 1D numpy array with the hyperparameter values

# Create a scatter plot
plt.scatter(hyperparam_list, avg_test_results)

# Set plot labels and title
plt.xlabel('Hyperparameter')
plt.ylabel('Performance')
plt.title('Scatter Plot of Test Results for Different Hyperparameter Values')

# Show the plot
plt.show()


In [None]:
# Create a line plot
plt.plot(hyperparam_list, avg_test_results, marker='o')

# Set plot labels and title
plt.xlabel('Hyperparameter')
plt.ylabel('Performance')
plt.title('Line Plot of Test Results for Different Hyperparameter Values')

# Show the plot
plt.show()


In [None]:

print(train_evolution.shape)
agg_train_evolution = np.mean(train_evolution, axis=0)
print(agg_train_evolution.shape)

print(agg_train_evolution[0])


plt.plot(agg_train_evolution[1])
plt.show()