In [6]:
import pandas as pd
import numpy as np
import plotly.figure_factory as ff


def modCMA():
    df = pd.read_csv('../data/processed_data/conf_perf2vec.csv', index_col=0)
    df_grid = pd.read_csv('../data/raw_data/dt_grid_v2.csv', index_col=0)
    for module in ['elitist', 'mirrored', 'base_sampler', 'weights_option', 'local_restart', 'step_size_adaptation']:
    # for module in ['elitist']:
        for dim in [5, 30]:
            for budget in [500, 2000, 5000, 10000, 50000]:
                variable = f'.*_{budget}_{dim}'           
                df_sub = df[df.index.str.contains(fr'\b{variable}\b', regex=True)]
                df_sub.index = [ int(i.split("_")[0]) for i in df_sub.index]
                module_values = df_grid[module]
                df_sub = df_sub.join(module_values)
                df_sub = df_sub.replace({np.nan: 'None'})
                # print(df_grid[module])
                unique_module_values = np.unique(df_sub[module])
                # print(unique_module_values)
                # print(df_sub)
                hist_data = []
                for unique_module_value in unique_module_values:
                    df_sub_m = df_sub[df_sub[module]==unique_module_value]
                    df_sub_m = df_sub_m.drop([module], axis=1)
                    distr = df_sub_m.mean(axis=1).values
                    distr = [np.log10(d+1) for d in distr]
                    hist_data.append(distr)

                # print(hist_data)
                labels = [str(l) for l in unique_module_values]


                fig = ff.create_distplot(hist_data, labels, bin_size=0.05, colors=['#636efa', '#ef553b', '#00cc96'], show_hist= show_rug=False)

                fig.update_layout(
                    title=dict(
                        text='Distribution Plot',
                        font=dict(
                            size=40
                        ),
                        x=0.5
                    ),
                    plot_bgcolor='#ffffff',
                    paper_bgcolor='#ffffff',
                    autosize=False,
                    width=900,
                    height=700,
                    margin=dict(
                        l=0,
                        r=0,
                        b=30,
                        t=0,
                    ),
                    xaxis=dict(
                        title='X Axis Title',
                        title_font=dict(size=30),
                        tickfont=dict(size=20),
                        showgrid=True
                    ),
                    yaxis=dict(
                        title='Y Axis Title',
                        title_font=dict(size=30),
                        tickfont=dict(size=20),
                        showgrid=True
                    ),
                    legend=dict(
                        yanchor='top',
                        y=0.99,
                        xanchor='left',
                        x=0.01,
                        font=dict(
                            size=20
                        )
                    ),
                    showlegend=True
                    )

                print(f'{module}-{dim}D-{budget}')

                fig.show()
                fig.write_image(f'../figures/distribution/perf2vec/perf2vec-{module}-{dim}D-{budget}.png')


def modDE():
    df = pd.read_csv('../data/processed_data_modDE/conf_perf2vec.csv', index_col=0)
    df_grid = pd.read_csv('../data/raw_data/modDE_dt_grid.csv', index_col=0)
    for module in ['mutation_base','mutation_reference','mutation_n_comps','use_archive','crossover','adaptation_method','lpsr']:
    # for module in ['elitist']:
        for dim in [5, 30]:
            for budget in [500, 2000, 5000, 10000, 50000]:
                variable = f'.*_{budget}_{dim}'           
                df_sub = df[df.index.str.contains(fr'\b{variable}\b', regex=True)]
                df_sub.index = [ int(i.split("_")[0]) for i in df_sub.index]
                module_values = df_grid[module]
                df_sub = df_sub.join(module_values)
                df_sub = df_sub.replace({np.nan: 'None'})
                # print(df_grid[module])
                unique_module_values = np.unique(df_sub[module])
                # print(unique_module_values)
                # print(df_sub)
                hist_data = []
                for unique_module_value in unique_module_values:
                    df_sub_m = df_sub[df_sub[module]==unique_module_value]
                    df_sub_m = df_sub_m.drop([module], axis=1)
                    distr = df_sub_m.mean(axis=1).values
                    distr = [np.log10(d+1) for d in distr]
                    hist_data.append(distr)

                # print(hist_data)
                labels = [str(l) for l in unique_module_values]
                fig = ff.create_distplot(hist_data, labels, bin_size=0.05, colors=['#636efa', '#ef553b', '#00cc96'],show_rug=False)
        
                fig.update_layout(
                title_text='', 
                title_x=0.5,
                autosize=False,
                width=900,
                height=700,
                margin=dict(
                    l=0,
                    r=0,
                    b=30,
                    t=0,
                ),
                font=dict(
                    size=35
                ),
                legend=dict(
                    yanchor="top",
                    orientation="h",
                    x=0,
                    y=1.1
                ),
                showlegend=True
                )
                print(f'{module}-{dim}D-{budget}')

                fig.show()
                fig.write_image(f'../figures/distribution_modDE/perf2vec/perf2vec-{module}-{dim}D-{budget}.png')

modDE()



mutation_base-5D-500
mutation_base-5D-2000
mutation_base-5D-5000
mutation_base-5D-10000
mutation_base-5D-50000
mutation_base-30D-500
mutation_base-30D-2000
mutation_base-30D-5000
mutation_base-30D-10000
mutation_base-30D-50000
mutation_reference-5D-500
mutation_reference-5D-2000
mutation_reference-5D-5000
mutation_reference-5D-10000
mutation_reference-5D-50000
mutation_reference-30D-500
mutation_reference-30D-2000
mutation_reference-30D-5000
mutation_reference-30D-10000
mutation_reference-30D-50000
mutation_n_comps-5D-500
mutation_n_comps-5D-2000
mutation_n_comps-5D-5000
mutation_n_comps-5D-10000
mutation_n_comps-5D-50000
mutation_n_comps-30D-500
mutation_n_comps-30D-2000
mutation_n_comps-30D-5000
mutation_n_comps-30D-10000
mutation_n_comps-30D-50000
use_archive-5D-500
use_archive-5D-2000
use_archive-5D-5000
use_archive-5D-10000
use_archive-5D-50000
use_archive-30D-500
use_archive-30D-2000
use_archive-30D-5000
use_archive-30D-10000
use_archive-30D-50000
crossover-5D-500
crossover-5D-20