In [3]:
from turtle import width
import numpy as np
import pandas as pd
import shap
import joblib
import os
from sklearn.model_selection import LeaveOneGroupOut
import shap
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def get_unique_values_for_module(df, module):
    return np.unique(df[module])

def generate_column_names(unique_values, budgets, dim):
    arr = []
    for budget in budgets:
        for unique_value in unique_values:
            arr.append(str(unique_value)+"-"+str(int(budget/dim))+'d')
    return arr

def get_most_important_features(df_shap, conf, budget, dim, num_features, ELA_features):
    # print(df_shap)
    # print(str(conf)+"-"+str(budget)+"-"+str(dim))
    shap_values = df_shap[df_shap.index == f'{conf}_{budget}_{dim}'].iloc[0]
    # print(shap_values)
    shap_values = shap_values[3:].tolist()
    vals= np.abs(shap_values)
    feature_importance = pd.DataFrame(list(zip(ELA_features,vals)),columns=['col_name','feature_importance_vals'])
    feature_importance.sort_values(by=['feature_importance_vals'],ascending=False,inplace=True)
    return np.array(feature_importance['col_name'])[0:num_features]

modAlgo = "modCMA"
dims = [5, 30]

modules = ['elitist', 'mirrored', 'base_sampler', 'weights_option', 'local_restart', 'step_size_adaptation'] if modAlgo == "modCMA" else ['mutation_base','mutation_reference','mutation_n_comps','use_archive','crossover','adaptation_method','lpsr']
features = ['diff_mean_02', 'diff_mean_05', 'diff_mean_10', 'diff_mean_25', 'diff_median_02', 'diff_median_05', 'diff_median_10', 'diff_median_25', 'dist_ratio.coeff_var', 'eps.max', 'eps.ratio', 'eps.s', 'expl_var.cor_init', 'expl_var.cor_x', 'expl_var.cov_init', 'expl_var.cov_x', 'expl_var_PC1.cor_init', 'expl_var_PC1.cor_x', 'expl_var_PC1.cov_init', 'expl_var_PC1.cov_x', 'h.max', 'kurtosis', 'lin_simple.adj_r2', 'lin_simple.coef.max', 'lin_simple.coef.max_by_min', 'lin_simple.coef.min', 'lin_simple.intercept', 'lin_w_interact.adj_r2', 'm0', 'nb_fitness.cor', 'nn_nb.cor', 'nn_nb.mean_ratio', 'nn_nb.sd_ratio', 'number_of_peaks', 'quad_simple.adj_r2', 'quad_simple.cond', 'quad_w_interact.adj_r2', 'ratio_mean_02', 'ratio_mean_05', 'ratio_mean_10', 'ratio_mean_25', 'ratio_median_02', 'ratio_median_05', 'ratio_median_10', 'ratio_median_25', 'skewness']
conf_num = 324 if modAlgo == "modCMA" else 576


df_grid = pd.read_csv(f'../data/raw_data/{modAlgo}_conf_grid.csv', index_col = 0)
df_grid = df_grid.replace({np.nan: 'None'})

df_shap = pd.read_csv(f'../results/{modAlgo}/shapley_merged/shapley_train.csv', index_col=0)
for module in modules:
    print(module)
    module_options = get_unique_values_for_module(df_grid, module)
    fig = make_subplots(rows=6, cols=1, vertical_spacing=0.015)
    row = 1 
    for top_features in [10, 15, 20]:
        for dim in dims:
            budgets =  [50*dim, 100*dim, 300*dim, 500*dim, 1000*dim, 1500*dim]
            data = np.zeros((len(budgets)*len(module_options),46))
            data_columns = generate_column_names(module_options, budgets, dim)
            for budget in budgets:
                for conf in range(0, conf_num):
                    conf_mod_status = df_grid.iloc[conf][module]
                    row_idx = data_columns.index(str(conf_mod_status)+"-"+str(int(budget/dim))+"d")
                    top_features_names = get_most_important_features(df_shap, conf, budget, dim, top_features, features)
                    for feature in top_features_names:
                        col_idx = features.index(feature)
                        data[row_idx][col_idx] = data[row_idx][col_idx] +1
            df_freq = pd.DataFrame(data, index=data_columns, columns = features)
            df_freq = df_freq.iloc[::-1]
            df_freq.to_csv(f"../results/{modAlgo}/frequency_tables/top{top_features}_{module}_dim_"+str(dim)+".csv")
            fig.add_trace(go.Heatmap(x =  df_freq.columns if (dim == 5 and top_features==10) else None, y=df_freq.index, z=df_freq, type='heatmap', colorscale= 'greens' if dim == 5 else 'purples', text=df_freq.values, texttemplate="%{text}",
                                    textfont={"size":9}), row=row, col=1)
            row = row + 1

    fig.update_traces(dict(showscale=False))
    fig.update_xaxes(side="top")
    fig.update_layout(
        width=1000,
        height= 1500,
        margin=dict(
        l=0,
        r=0,
        b=0,
        t=0,
        ),
        font = dict(
            size=10
        )
    )
    fig['layout']['yaxis']['title']='Dim: 5 Top: 10'
    fig['layout']['yaxis2']['title']='Dim: 30 Top: 10'
    fig['layout']['yaxis3']['title']='Dim: 5 Top: 15'
    fig['layout']['yaxis4']['title']='Dim: 30 Top: 15'
    fig['layout']['yaxis5']['title']='Dim: 5 Top: 20'
    fig['layout']['yaxis6']['title']='Dim: 30 Top: 20'
            
    fig.write_image(f"../figures/ELA_importance/{modAlgo}_{module}.pdf") 

elitist
mirrored
base_sampler
weights_option
local_restart
step_size_adaptation


In [10]:
# create plot for the paper
# elitist, mirored 5D and 30D top 10

import pandas as pd 
from plotly.subplots import make_subplots
import plotly.graph_objs as go

fig = make_subplots(rows=5, cols=1, shared_xaxes=True, vertical_spacing=0.01)
row = 1
for module in ['elitist', 'mirrored']:
    for dim in [5, 30]:
        for top in [10]:
            df = pd.read_csv(f'../results/modCMA/frequency_tables/top{top}_{module}_dim_{dim}.csv', index_col=0)
            fig.add_trace(go.Heatmap(x =  df.columns, y=df.index, z=df, type='heatmap', colorscale= 'Blues', text=df.values, texttemplate="%{text}",
                                       textfont={"size":9}), row=row, col=1)
            row += 1
df = pd.read_csv(f'../results/modDE/frequency_tables/top10_mutation_base_dim_5.csv', index_col=0)          
fig.add_trace(go.Heatmap(x =  df.columns, y=df.index, z=df, type='heatmap', colorscale= 'Blues', text=df.values, texttemplate="%{text}",
                                       textfont={"size":9}), row=row, col=1)    
fig.update_traces(dict(showscale=False))
# fig.update_xaxes(side="top")
fig.update_layout(
    width=1000,
    height= 1400,
    margin=dict(
    l=0,
    r=0,
    b=0,
    t=0,
    ),
    font = dict(
        size=11
    )
)
# fig.add_shape(type="line", x0=-0.57, y0=9.5, x1=45.6, y1=9.5, line=dict(color="white", width=3), row=2, col=1)
# fig.add_shape(type="line", x0=-0.57, y0=-0.5, x1=45.6, y1=-0.5, line=dict(color="white", width=3), row=2, col=1)
# fig.add_shape(type="line", x0=-0.57, y0=-0.5, x1=45.6, y1=-0.5, line=dict(color="white", width=3), row=3, col=1)

fig['layout']['yaxis']['title']='Elitist, 5D, Top 10'
fig['layout']['yaxis2']['title']='Elitist, 30D, Top 10'
fig['layout']['yaxis3']['title']='Mirrored, 5D, Top 10'
fig['layout']['yaxis4']['title']='Mirrored, 30D, Top 10'
fig['layout']['yaxis5']['title']='Mutation base, 5D, Top 10'
fig.show()
fig.write_image(f"../figures/frequency_table.pdf") 
