In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import itertools

In [2]:
from sklearn.metrics import auc
import seaborn as sns

In [12]:
import re

In [4]:
target_pow = [2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8]
targets = [10**x for x in target_pow]     #set of target values for ecdf calculation
evals = [1, 223, 445, 668, 890, 1112, 1334, 1557, 1779, 2001, 2223, 2445, 2668, 2890,
         3112, 3334, 3557, 3779, 4001, 4223, 4445, 4668, 4890, 5001]   #budget values at which ecdf will be calulated
                                                                       #set these according to csv files of original target samples obtained from iohanalyzer
algorithms = ['CMA','DiagonalCMA','PSO','NGO','Shiwa','EDA','NelderMead','NaiveIsoEMNA','DE']

In [5]:
#all possible combinations to be run (simulated) in parallel
def algo_combination(algorithms=algorithms):    
    rem = []
    cross = list(itertools.product(algorithms, algorithms))
    for s in cross:
        if s[0]==s[1]:
            continue
        if s not in rem and (s[1],s[0]) not in rem:
            rem.append(s)
    return rem  

In [6]:
functions = list(np.arange(25, 44))  #function ids

In [8]:
df_single_set = pd.DataFrame(columns=algorithms, index=functions)   #single algorithms, single set of 25 runs for each algo
df_single_set = df_single_set.fillna(0)
df_single_set = df_single_set.astype(float)
#df_single_set.head()

In [9]:
#average ecdf auc calculation for single algorithms, single set of 25 runs for each algo
def single_set_auc(func, algo, targets=targets, evals=evals):
    df = pd.read_csv(r'filepath/FVSample-10DF'+str(func)+'.csv')  #csv files of original target samples for each fn obtained by
                                                                    #uploading logger data to iohanalyzer
    pts_algo = []
    for feval in evals:
        tar_tot = 0
        
        for target in targets:
            total = 0
            
            for i in range(4, 29):     #25 runs
                if float(df[df['ID']==algo][df['runtime']==feval].iloc[:, i]) <= target:
                    total += 1
                
            total = total/25   #average over 25 runs
            tar_tot += total
            
        tar_tot = tar_tot/len(targets)    #average over set of targets
        pts_algo.append(tar_tot)
        
    area = auc(evals, pts_algo)
    df_single_set.loc[func][algo] = area

In [10]:
df_single_algo = pd.DataFrame(columns=algorithms, index=functions)   #single algorithms, better values of the two sets of runs for each algo
df_single_algo = df_single_algo.fillna(0)
df_single_algo = df_single_algo.astype(float)

In [11]:
#to calculate combined auc for set1, set2 of the same algorithm (better values out of the two sets) 
def set1_set2_auc(func, algo, targets=targets, evals=evals):
    df1 = pd.read_csv(r'filepath1/FVSample-10DF'+str(func)+'.csv')  #set-1, 25 runs
    df2 = pd.read_csv(r'filepath2/FVSample-10DF'+str(func)+'.csv')  #set-2, 25 runs
   
    pts_algo1 = []
    pts_algo2 = []
    
    for feval in evals:
        tar_tot1 = 0
        tar_tot2 = 0
        for target in targets:
            total1 = 0
            total2 = 0
 
            for i in range(4, 29):
                if float(df1[df1['ID']==algo][df1['runtime']==feval].iloc[:, i]) <= target:
                    total1 += 1
                if float(df2[df2['ID']==algo][df2['runtime']==feval].iloc[:, i]) <= target:
                    total2 += 1
            total1 = total1/25
            total2 = total2/25
            tar_tot1 += total1
            tar_tot2 += total2
        tar_tot1 = tar_tot1/len(targets)
        tar_tot2 = tar_tot2/len(targets)
        pts_algo1.append(tar_tot1)
        pts_algo2.append(tar_tot2)

    area = max(auc(evals, pts_algo1), auc(evals, pts_algo2))  #better of the two sets of runs
    df_single_algo.loc[func][algo] = area
    print(func, algo)

In [13]:
#convert column names from string to tuple
def conv_str_tuple(df):
    c = "'"
    for col in df.columns:
        new_col = re.sub(c, '', col)
        df.rename(columns = {col:new_col}, inplace = True)    

In [14]:
df_auc = pd.DataFrame(columns=algo_combination(), index=functions)   #algorithm combinations
df_auc = df_auc.fillna(0)
df_auc = df_auc.astype(float)
#df_auc.head()

In [15]:
#calculate combined ecdf auc for algorithm combinations
def ecdf_auc(func, algo_tuple, targets=targets, evals=evals):    
    
    df = pd.read_csv(r'filepath/FVSample-10DF'+str(func)+'.csv')
    
    algo1 = algo_tuple[0]
    algo2 = algo_tuple[1]
    
    
    #fig, ax = plt.subplots(1, 2, figsize=(10, 4))  uncomment if plot reqd.
    pts_algo1 = []
    pts_algo2 = []
    pseudo = []
    
    
    for feval in evals:
        tar_tot1 = 0
        tar_tot2 = 0
        for target in targets:
            total1 = 0
            total2 = 0
            for i in range(4, 29):
                if float(df[df['ID']==algo1][df['runtime']==feval].iloc[:, i]) <= target:  #for algorithm-1
                    total1 += 1
                if float(df[df['ID']==algo2][df['runtime']==feval].iloc[:, i]) <= target:   #for algorithm-2
                    total2 += 1
            total1 = total1/25
            total2 = total2/25
            tar_tot1 += total1
            tar_tot2 += total2
        tar_tot1 = tar_tot1/len(targets)
        tar_tot2 = tar_tot2/len(targets)
        pts_algo1.append(tar_tot1)
        pts_algo2.append(tar_tot2)
        
    for i in range(len(pts_algo1)):
        if pts_algo1[i] >= pts_algo2[i]:
            pseudo.append((pts_algo1[i], algo1))
        else:
            pseudo.append((pts_algo2[i], algo2))
     
    pts_pseudo = [x[0] for x in pseudo]   #only higher ecdf values of the two algorithms
   
    ax[0].plot(evals, pts_algo1, color='blue', marker='o')
    ax[0].plot(evals, pts_algo2, color='red', marker='^')
    ax[0].legend([algo1, algo2], prop={'size': 9})
    ax[0].set_title('ECDF of '+algo1+' and\n'+algo2+' on F'+str(func), fontsize=9)
    ax[0].set_xlabel('Function evaluations', fontsize=9)
    ax[0].set_ylabel('Proportion of runs', fontsize=9)
    ax[0].tick_params(axis='x', labelsize= 9)
    ax[0].tick_params(axis='y', labelsize= 9)
    
    ax[1].plot(evals, pts_pseudo, color='green', marker='o', alpha=0.6)
    area = auc(evals, pts_pseudo)
    df_auc.loc[func][algo_tuple] = area
    area = round(area, 2)
        
    ax[1].set_title('ECDF of combination on F'+str(func)+'\nAUC: '+str(area), fontsize=9)
    
    ax[1].set_xlabel('Function evaluations', fontsize=9)
    ax[1].set_ylabel('Proportion of runs', fontsize=9)
    ax[1].tick_params(axis='x', labelsize= 9)
    ax[1].tick_params(axis='y', labelsize= 9)
    
    
    fig.tight_layout()
    
    plt.savefig(r'graphs_filepath/'+str(func)+'_'+algo1+'_'+algo2+'.pdf')
    print(func, algo_tuple)

In [16]:
df_auc = df_auc.join(df_single_algo)  #combination of 2 distinct algos + combination of two runs same algo

In [18]:
df_auc_n = df_auc/5000  #normalization 

In [19]:
#plot heatmap for normalized auc values
def hmap_auc(df):
    x = sns.heatmap(df, xticklabels=1, cmap="rocket_r", linewidths=0.5, linecolor='white')
    ax.set_xlabel('Algorithm (combinations)')
    ax.set_ylabel('Function ID')
    ax.set_title('Heatmap of algorithm (combination) performance on functions\n(measured by area under ECDF curve)')
    ax.set_xticklabels(ax.get_xticklabels(), rotation=60, horizontalalignment='right')
    figure = plt.gcf()  # get current figure
    #figure.set_tight_layout(True)
    figure.set_size_inches(20,6)
    plt.savefig('hmap.pdf', bbox_inches='tight')
    plt.show()    

In [21]:
df_labels = pd.DataFrame(columns=df_auc.columns, index=functions)  #labels dataframe reqd. for plotting improvement hmap
df_labels = df_labels.fillna(0)
df_labels = df_labels.astype(float)

In [23]:
#compare combination performance to better performing algo/run of the two on each function
def compare_algo(df_auc=df_auc, h=True):
    
    df_combo = df_auc.copy()
    
    for col in df_combo.columns:
        if col[0]!='(':    #if algo is not tuple, then it is a single algo
            single = True
            #a1 = col
            #a2 = col
        else:
            single = False
            a1 = col.split(',')[0][1:]
            a2 = col.split(',')[1][1:-1]   #split into respective algo names
        for func in functions:
            if h:
                if single:
                    div = df_single_set.loc[func][col]    #compare with 1 set of runs
                    
                else:    
                    div = max(df_single_set.loc[func][a1], df_single_set.loc[func][a2]) #sbs for that function
                    if div == df_single_set.loc[func][a1]:
                        df_labels.loc[func][col] = 1     #label for sbs position (1 denotes algo1 out of (algo1,algo2) is better)
                    else:
                        df_labels.loc[func][col] = 2
            
            
            if div==0.0:
                div+=0.000001
            df_combo.loc[func][col] = df_combo.loc[func][col]/div    #comparison
    return df_combo               

In [24]:
#hmap of performance improvement of combination w.r.t. sbs per fn
def hmap_imp_perfn(df_combo):
    labels = df_labels.to_numpy()
    ax = sns.heatmap(df_combo, annot=labels, xticklabels=1, cmap="rocket_r", linewidths=0.5, linecolor='white', vmin=1.0, vmax=1.10)
    ax.set_xlabel('Algorithm (combinations)')
    ax.set_ylabel('Function ID')
    title = 'Heatmap of algorithm combination performance improvement with '
    title += 'respect to\n the single best solver of the two (for each function) on functions\n'
    title += '(measured by area under ECDF curve)'
    #ax.set_ylim(0.0, 0.05)
    ax.set_title(title)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=60, horizontalalignment='right')
    figure = plt.gcf()  # get current figure
    #figure.set_tight_layout(True)
    figure.set_size_inches(20,6)
    plt.savefig('hmap_sbs_hardline.pdf', bbox_inches='tight')
    plt.show()
    

In [25]:
#bar_plot for performance
def bar_plot(df_auc_n=df_auc_n):
    
    col_sum = {}
    for col in df_auc_n.columns:
        col_sum[col] = df_auc_n[col].sum()   #cumulative performance across fns for each algo/combination
    col_sum = pd.DataFrame.from_dict(col_sum, orient='index')
    col_sum = col_sum.rename(columns={0: "Performance"})
    col_sum = col_sum.sort_values(by=['Performance'])  #ascending order of performance
    
    x = list(np.arange(45))   #xticks for algorithms and combinations
    col_sum = col_sum.reset_index()
    plt.bar(x, col_sum['Performance'])
    plt.xlabel('Algorithm combinations', fontsize=7)
    plt.ylabel('Performance measure', fontsize=7)
    plt.title('Overall performance of algorithms across functions', fontsize=7)
    plt.xticks(x, col_sum['index'], rotation=90, fontsize=6)
    plt.yticks(fontsize=7)
    figure = plt.gcf()  # get current figure
    #figure.set_tight_layout(True)
    figure.set_size_inches(8,4)
    plt.savefig(r'ranking.pdf', bbox_inches='tight')
    plt.show()
    