In diesem Notebook befinden sich alle Methoden für die Erstellung der Diagramme und Graphen für die History-Objekte.
Außderm sind Funktionen enthalten, mit denen History-Objetke aus mehreren Ordnern eingelesen werden können.

In [4]:
import os
import math
import numpy as np
import pickle
import matplotlib.pyplot as plt
import history as ht

In [5]:
graph_figsize = (4.6, 3.1)

def get_best_history(histories):
    """Returns the history object with the highest
    validation accuracy on validation loss minimum.

    Parameters
    ----------
    histories : list of history objects   

    Returns
    -------
    best_history : history object
    """
    best_history = None
    histories_num = len(histories)
    acc_values = np.zeros(histories_num, dtype='float32')
    i = 0
    for history in histories:
        min_loss_index = history.history['val_loss'].index(np.amin(history.history['val_loss']))
        acc_values[i] = history.history['val_accuracy'][min_loss_index]
        if "eval_test" in history.history:
            acc_values[i] += history.history['eval_test'][1]
            acc_values[i] = acc_values[i]/2
        i += 1
    maximum = np.amax(acc_values)
    max_acc_index = np.argmax(acc_values)
    best_history = histories[max_acc_index]
    return best_history


def average_history(histories, template_name, epoch_limit=None):
    """Calculates the average of a given list of histories.

    Parameters
    ----------
    histories : list of history objects
    template_name : str
    epoch_limit : int     

    Returns
    -------
    avg_history : history object
    """
    histories_num = len(histories)
    #print(histories_num)
    length = len(histories[0].history['val_loss'])
    if epoch_limit is not None and length > epoch_limit:
        length = epoch_limit
    accuracy = np.zeros(length, dtype='float32')
    recall = np.zeros(length, dtype='float32')
    precision = np.zeros(length, dtype='float32')
    loss = np.zeros(length, dtype='float32')
    val_accuracy = np.zeros(length, dtype='float32')
    val_recall = np.zeros(length, dtype='float32')
    val_precision = np.zeros(length, dtype='float32')
    val_loss = np.zeros(length, dtype='float32')
    traintime = np.array([0], dtype="float32")
    eval_val_acc = np.array([0], dtype="float32")
    eval_test_acc = np.array([0], dtype="float32")
    eval_val_loss = np.array([0], dtype="float32")
    eval_test_loss = np.array([0], dtype="float32")
    evaluation = False
    
    for history in histories:
        traintime += history.history['traintime'][0]
        if "eval_test" in history.history:
            evaluation = True
            eval_val_loss += history.history['eval_val'][0]
            eval_val_acc += history.history['eval_val'][1]
            eval_test_loss += history.history['eval_test'][0]
            eval_test_acc += history.history['eval_test'][1]

        for i in range(length):
            accuracy[i] += history.history['accuracy'][i]
            recall[i] += history.history['recall'][i]
            precision[i] += history.history['precision'][i]
            loss[i] += history.history['loss'][i]
            val_accuracy[i] += history.history['val_accuracy'][i]
            val_recall[i] += history.history['val_recall'][i]
            val_precision[i] += history.history['val_precision'][i]
            val_loss[i] += history.history['val_loss'][i]
            
    traintime = traintime / histories_num 
    accuracy = accuracy / histories_num
    recall = recall / histories_num
    precision = precision / histories_num
    loss = loss / histories_num
    val_accuracy = val_accuracy / histories_num
    val_recall = val_recall / histories_num
    val_precision = val_precision / histories_num
    val_loss = val_loss / histories_num
    
    avg_history = ht.load_history(template_name)
    if evaluation:
        eval_val_acc = eval_val_acc / histories_num 
        eval_test_acc = eval_test_acc / histories_num
        eval_val_loss = eval_val_loss / histories_num 
        eval_test_loss = eval_test_loss / histories_num
        avg_history.history['eval_val'][0] = list(eval_val_loss)
        avg_history.history['eval_val'][1] = list(eval_val_acc)
        avg_history.history['eval_test'][0] = list(eval_test_loss)
        avg_history.history['eval_test'][1] = list(eval_test_acc)
    
    avg_history.history['traintime'] = list(traintime)
    avg_history.history['accuracy'] = list(accuracy)
    avg_history.history['recall'] = list(recall)
    avg_history.history['precision'] = list(precision)
    avg_history.history['loss'] = list(loss)
    avg_history.history['val_accuracy'] = list(val_accuracy)
    avg_history.history['val_recall'] = list(val_recall)
    avg_history.history['val_precision'] = list(val_precision)
    avg_history.history['val_loss'] = list(val_loss)
        
    return avg_history


def plot_graph(metric, histories, labels, loc, titel, colors,
               window=None, metric2=None, epoch_window=None):
    """Plots all given histories as graph.
    y-axis and x-axis of plot can be changed bei window and epoch_window.
    Can show two metrics in one plot.

    Parameters
    ----------
    metric : str
    histories : list of history objects
    labels : str list
    loc : str
    titel : str
    colors : color str list
    window : list of 2 floats
    metric2 : str
    epoch_window : list of 2 floats
    """
    plt.figure(figsize=graph_figsize, dpi=90)
    plt.title(titel)
    plt.ylabel(metric)
    plt.xlabel('Epoch')
    for i in range(len(histories)):
        plt.plot(histories[i].history[metric], label=labels[i], color=colors[i])
        if metric2 is not None:
            plt.plot(histories[i].history[metric2], linestyle='dashed', color=colors[i])
            plt.ylabel(metric + ' & ' + metric2)
    legend = plt.legend(loc=loc, prop={'size':10})
    if window is not None:
        plt.ylim(top=window[1], bottom=window[0]) # displayed area
    if epoch_window is not None:
        plt.xlim(left=epoch_window[0], right=epoch_window[1])
    plt.show()
    

def autolabel(ax, rects):
    '''
    Attach a text label above each bar displaying its height
    '''
    for rect in rects:
        height = rect.get_height()
        #print('height: ' + str(height))
        ax.text(rect.get_x() + rect.get_width()/2., 1.01*height,
                '%g' % round(height, 2), ha='center', va='bottom')
        
    
def evaluate_bar_plot(histories, labels, metric, window=None, figsize=None, titel_add=None):
    """Generates a bar plot with given histories for loss or accuracy.
    Used to compare test and validation accuracy.

    Parameters
    ----------
    histories : list of history objects
    labels : str list
    metric : str
    window : list of 2 floats
    figsize : list of 2 floats
    titel_add : str      
    """
    test = []
    vali = []
    for history in histories:
        min_loss_index = history.history['val_loss'].index(np.amin(history.history['val_loss']))
        vali.append(history.history['val_accuracy'][min_loss_index])
        if metric == 'accuracy':
            test.append(history.history['eval_test'][1][0])
        elif metric == 'loss':
            test.append(history.history['eval_test'][0][0])     
    x = np.arange(len(labels))
    width = 0.40
    if figsize is not None: # if the size for the figure is given
        fig, ax = plt.subplots(figsize=figsize, dpi=90)
    else:
        fig, ax = plt.subplots(figsize=(7, 1.5), dpi=90)
        
    rects1 = ax.bar(x - width/2, test, width, label='Test', color='tab:purple')
    rects2 = ax.bar(x + width/2, vali, width, label='Vali.', color='tab:orange')
    plt.ylabel(metric)
    titel = 'Comparison Vali. & Test'
    if titel_add is not None: # Add some text for labels, title and custom x-axis tick labels, etc.
        titel = titel_add + ': ' + titel
    plt.title(titel)
    plt.xticks(x, labels)
    plt.legend(loc='lower right', prop={'size':10})
    if window is not None: # if the interval shoud be trimmed on y-axis
        plt.ylim(top=window[1], bottom=window[0])     
    autolabel(ax, rects1)
    autolabel(ax, rects2)
    plt.show()


def bar_plot(histories, labels, titel, search, unit, colors, window=None):
    """Generates a bar plot with given histories for a specific metric.
    Used show Minimum of Validation Loss. But can show every metric of the history object.

    Parameters
    ----------
    histories : list of history objects
    labels : str list
    titel : str
    search : str
    unit : str
    colors : color str list
    window : list of 2 floats    
    """
    length = len(histories)
    values = np.zeros(length)
    for i in range(length):
        if search == 'eval':
            if unit == 'eval_val_loss':
                values[i] = histories[i].history['eval_val'][0][0]
            elif unit == 'eval_val_acc':
                values[i] = histories[i].history['eval_val'][1][0]
            elif unit == 'eval_test_loss':
                values[i] = histories[i].history['eval_test'][0][0]
            elif unit == 'eval_test_acc':
                values[i] = histories[i].history['eval_test'][1][0]
        else:
            if search == 'min':
                values[i] = np.amin(histories[i].history[unit])
            elif search == 'max':
                values[i] = np.amax(histories[i].history[unit])
            
    fig = plt.figure(figsize=(3.58,2.345), dpi=90)
    ax = fig.add_axes([0,0,1,1])
    plt.title(titel)
    plt.ylabel(unit)
    if unit == 'traintime':
        plt.ylabel('traintime in sec')
    labels = labels
    rects = ax.bar(labels, values, color=colors)
    if window is not None:
        plt.ylim(top=window[1], bottom=window[0])
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., 1.01*height,
                '%g' % round(height, 3),
                ha='center', va='bottom')
    plt.show()
    
    
def bar_plot_loss_min(histories, labels, titel, unit, colors, window=None):
    """Generates a bar plot with given histories for a specific metric
    at minimum of validation loss.

    Parameters
    ----------
    histories : list of history objects
    labels : str list
    titel : str
    unit : str
    colors : color str list
    window : list of 2 floats    
    """
    length = len(histories)
    values = np.zeros(length)
    
    for i in range(length):
        min_loss_index = histories[i].history['val_loss'].index(np.amin(histories[i].history['val_loss']))
        values[i] = histories[i].history[unit][min_loss_index]
        
    fig = plt.figure(figsize=(3.58,2.345), dpi=90)
    ax = fig.add_axes([0,0,1,1])
    plt.title(titel)
    plt.ylabel(unit)
    if unit == 'traintime':
        plt.ylabel('traintime in sec')
    labels = labels
    rects = ax.bar(labels, values, color=colors)
    print('values: ' + str(values))
    if window is not None:
        plt.ylim(top=window[1], bottom=window[0]) #ausschnitt der gezeigt wird, y Achse
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., 1.01*height,
                '%g' % round(height, 3),
                ha='center', va='bottom')
    plt.show()
    
    
def load_histories_from_folder(folderpatch):
    """Loads the history objects in a folder.
    Returns all histories of the given folderpatch.

    Parameters
    ----------
    folderpatch : str
    
    Returns
    -------
    histories : list of history objects
    """
    histories = []
    for file in os.listdir('../histories/' + folderpatch):
        path = '../histories/' + folderpatch + '/' + file
        filename = str(file)
        with open(path, 'rb') as file:
            h = pickle.load(file)
            h.history['filename'] = filename
            histories.append(h)
    return histories


def get_histories_of_subfolders(subfolders, folder):
    """Returns a history list for many given subfolders
    (filled with historiy objects) in a folder

    Parameters
    ----------
    subfolders : str
    folder : str
    
    Returns
    -------
    folder_hist_list : list of history objects of all folders
    """
    folder_hist_list = []
    for sub in subfolders:
        h = load_histories_from_folder(folder + '/' + sub)
        folder_hist_list.append(h)
    return folder_hist_list


def get_best_and_avgs(folder_hist_list, template_path, epoch_limit=None):
    """Returns best histories for every model-variant and the average for every variant.

    Parameters
    ----------
    folder_hist_list : str
    template_path : str
    number :
    epoch_limit : int
    
    Returns
    -------
    best : list of best history objects
    avgs : list of average history objects
    """
    best = []
    avgs = []
    for folder_hist in folder_hist_list:
        best_h = get_best_history(folder_hist)
        print(best_h.history['filename'])
        best_h = average_history([best_h], template_path, epoch_limit=epoch_limit)
        best.append(best_h)
        avg_h = average_history(folder_hist, template_path, epoch_limit=epoch_limit)
        avgs.append(avg_h)
    return best, avgs
