In [1]:
import pickle
import os
from itertools import chain, groupby
from operator import itemgetter

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

%matplotlib inline

In [58]:
def get_f_names(folder, prefix = '', postfix = '.pickle'):
    return [f for f in list(os.walk(folder))[0][-1] if f.startswith(prefix) and f.endswith(postfix)]

def load_data(folder, file_name):
    with open(os.path.join(folder, file_name), 'rb') as read_pickle:
        return pickle.load(read_pickle)

def get_init_first_second(data):
    first = 'res'
    second = 'ord'

    init = data[first][0][0][0] # first -> history -> initial_step -> value
    first_res = data[first][1][-1][0] # second -> hall_of_fame -> best -> value
    second_res = data[second][1][-1][0] # second -> hall_of_fame -> best -> value

    first_res_percent = 1 - first_res / init
    total_percent = 1 - second_res / init
    second_res_percent = total_percent - first_res_percent

    # return init, first_res, second_res
    return 100, first_res_percent * 100, second_res_percent * 100, total_percent * 100

def agg_data(data):
    df = pd.DataFrame([[grouper, *get_init_first_second(run)]
     for grouper, group in groupby(data, itemgetter('modification'))
     for run in group], columns=['modification', 'init', 'res', 'ord', 'total'])

    grouped = df.groupby('modification').agg(dict(init=('count', 'mean'), res=('mean', 'std'), ord=('mean', 'std'), total=('mean', 'std')))
    return grouped

def get_aggregation(folder, f_names):
    data = [load_data(folder, f) for f in f_names]
    agg = agg_data(data)
    return agg

In [59]:
folder = 'results/1'
files = get_f_names(folder)
df_mod = get_aggregation(folder, files)
display(df_mod)

Unnamed: 0_level_0,init,init,res,res,ord,ord,total,total
Unnamed: 0_level_1,count,mean,mean,std,mean,std,mean,std
modification,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
aspiration_ge,4,100.0,1.266222,0.541361,28.859499,1.242807,30.125721,0.803436
no,4,100.0,0.082072,0.071804,34.853183,1.381917,34.935254,1.44786
simple_id,4,100.0,0.155025,0.063616,35.546234,1.568608,35.701258,1.520311
stochastic_selection,4,100.0,0.068393,0.075013,35.443644,1.16635,35.512037,1.187617
stochastic_tabu,4,100.0,0.036476,0.051585,37.281142,0.838573,37.317618,0.814017
use_ga,4,100.0,1.430696,1.058812,13.027217,6.837183,14.457913,7.548131


In [60]:
def get_repeated_data(data):
    first = 'res'
    second = 'ord'

    init = data['opt'][0][first][0][0][0] # optimisation -> first_iter -> first -> history -> initial_step -> value
    res = data['opt'][-1][second][1][-1][0] # optimisation -> last_iter -> second -> hall_of_fame -> best -> value

    return 100, 100 - (res / init) * 100

def agg_repeated_data(data):
    df = pd.DataFrame([[grouper, *get_repeated_data(run[1])]
     for grouper, group in groupby(data, itemgetter(0))
     for run in group], columns=['modification', 'init', 'total'])

    grouped = df.groupby('modification').agg(dict(init=('count', 'mean'), total=('mean', 'std')))
    return grouped

def get_repeated_aggregation(folder, f_names):
    data = [(n, load_data(folder, f)) for n, f in chain(*f_names)]
    agg = agg_repeated_data(data)
    return agg

In [61]:
folder = 'results/'
files = get_f_names(folder, 'pure') + get_f_names(folder, 'mod')
df_best = get_aggregation(folder, files)
# display(df_best / df_best.loc[False, ('total', 'mean')] * df_mod.loc['no', ('total', 'mean')])
display(df_best)

Unnamed: 0_level_0,init,init,res,res,ord,ord,total,total
Unnamed: 0_level_1,count,mean,mean,std,mean,std,mean,std
modification,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
False,4,100.0,0.117005,0.086076,31.089815,2.602242,31.20682,2.534156
True,4,100.0,0.03343,0.137082,31.109316,0.822764,31.142746,0.714816


In [62]:
folder = 'results/'
files = [[('no', i ) for i in get_f_names(folder, 'gen-tabu')], [('iter', i) for i in get_f_names(folder, 'iter-gen-tabu')]]
df_iter = get_repeated_aggregation(folder, files)
display(df_iter / df_iter.loc['no', ('total', 'mean')] * df_mod.loc['no', ('total', 'mean')])

Unnamed: 0_level_0,init,init,total,total
Unnamed: 0_level_1,count,mean,mean,std
modification,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
iter,3.647075,91.176883,37.746195,1.542741
no,3.647075,91.176883,34.935254,1.952533
