# Algorithm Performance Profile

Produce an algorithm profile following an experiment with `doframework`. 

In [1]:
import os
import yaml
import json

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

from doframework.core.storage import Storage
from doframework.core.inputs import get_configs
from doframework.core.gp import find_modal
from doframework.flow.metrics import files_from_solution

# Configs

In [2]:
configs_root = os.environ['HOME']
configs_file = 'ibm_configs.yaml'
configs_path = os.path.join(configs_root,configs_file)

configs = get_configs(configs_path)

# Storage

In [3]:
storage = Storage(configs)

In [4]:
buckets = storage.buckets()

In [5]:
solution_files = storage.get_all(buckets['solutions'],'json')

# Problem Type

A minimum or maximum problems in experiment.

In [6]:
opt = 'min'

# Fetch Results

In [7]:
rel_perfs = []

for file in solution_files:
    
    solution_name = file.key if 's3' in configs else file.name
    
    solution_obj = storage.get(buckets['solutions'],solution_name)
    solution = json.load(solution_obj)
            
    assoc_files = files_from_solution(solution_name)
    
    # get relevant objective
    objective_name = assoc_files['objective']
    objective_obj = storage.get(buckets['objectives_dest'],objective_name)
    objective = json.load(objective_obj)
    
    # get relevant data
    data_name = assoc_files['data']
    data_obj = storage.get(buckets['data_dest'],data_name)
    data = pd.read_csv(data_obj)
    dim = data.iloc[:,:-1].shape[-1]
        
    if solution['solution'][opt] != 'FAILED':
        
        solution_val = solution['solution'][opt]['value']
        true_min_val = objective['optimum']['min']['value']
        true_max_val = objective['optimum']['max']['value']

        rel_perf = (solution_val-true_min_val)/(true_max_val-true_min_val)
        rel_perfs.append([solution_name,objective_name,data_name,dim,rel_perf,True])
        
    else:
        
        rel_perfs.append([solution_name,objective_name,data_name,dim,np.nan,False])        
            

# Performance

In [8]:
df = pd.DataFrame(rel_perfs,columns=['solution','objective','data','dimension','performance','success'])

In [9]:
df[['solution','dimension','performance','success']].head()

Unnamed: 0,solution,dimension,performance,success
0,solution_80kglwzy_h48iairw_d5i4i6hy.json,5,0.53573,True
1,solution_4hvp7lb4_6573fo52_h4ws714j.json,5,0.069162,True
2,solution_4hvp7lb4_7afa38ve_9vqmyg7h.json,5,0.069162,True
3,solution_80kglwzy_yr5b9tn6_hy7hintd.json,5,0.53573,True
4,solution_egmxpel4_e6g8v0py_x3lovfq3.json,5,1.0,True


In [10]:
solution_num = df.shape[0]
solution_good = df[~df.performance.isna()].shape[0]

print('Reviewed {} solutions.'.format(solution_num))
print('Of those, {:.2f}% succsessful.'.format(100*(solution_good/solution_num)))

Reviewed 9 solutions.
Of those, 100.00% succsessful.


# Profile

Let's see how performance scores are distributed.

The score of a predicted optimum $\hat{x}^*$ relative to the true optimum is

$$\text{score}(\hat{x}^*) = \frac{f(\hat{x}^*)-f(x_\min)}{f(x_\max)-f(x_\min)}$$

In [None]:
plt.hist(df.performance,density=True,bins=20)
plt.title('Algorithm Profile:\n{} problem, {} solutions, dimensions {}-{}'.\
          format(opt,solution_good,min(df.dimension),max(df.dimension)))
plt.xlabel(r'$(f(\hat{x}^*)-f(x_\min))\,/\,(f(x_\max)-f(x_\min))$',fontsize=16)
plt.ylabel(r'Density',fontsize=16)
plt.show()

In [None]:
print('Mean Performance: {:.2f}'.format(df.performance.mean()))
print('Median Performance: {:.2f}'.format(df.performance.median()))
print('Distribution Modal: {:.2f}'.format(find_modal(df[~df.performance.isna()].performance)))

# Probability

Estimate the probability 
$$Pr[f(\hat{x}^*) < f(x_{\min}) + \epsilon \cdot (f(x_{\max})-f(x_{\min})) \, \vert \, \Omega]$$
for an algorithm predicted optimum $\hat{x}^*$ of $f$ in $\Omega$.

In [None]:
kde = gaussian_kde(df[~df.performance.isna()].performance)

epsilon = 0.4

print('Estimated performance probability: {:.2f}'.format(kde.integrate_box_1d(0,epsilon)))