In [10]:
import glob
import numpy as np
import tensorflow as tf

from tensorboard.backend.event_processing import event_accumulator

In [4]:
def loadTensorboardTensorData(enty_tag, folder_path):
    """
    Loads the tensor data of the log entry with the given entry tag from the given path.
    :param enty_tag: (String) The tag of the entry from which to get the data.
    :param folder_path: (String) The path to the log dir.
    :return: data_per_step: (Tupel) (step_nums, data) The data for each step.
    """
    ea = event_accumulator.EventAccumulator(folder_path, size_guidance={'tensors': 0})
    ea.Reload()
    try:
        _, step_nums, data = zip(*ea.Tensors(enty_tag))
    except:
        _, step_nums, data = zip(*ea.Tensors("SingleGPUTraining/"+enty_tag))  
    return (step_nums[:-1], data[:-1])

In [13]:
def loadModelTensorboardData(dir_prefix, dir_suffix, model_xFolds, loss_enty_tag ="loss", obj_loss_enty_tag = "obj_loss", reg_loss_enty_tag = "reg_loss", acc_enty_tag = None):
    """
    Loads the tensoboard data of the model given by dir_prefix, dir_suffix and its xFolds.
    
    :param dir_prefix: (String) The prefix of the dir from which to get the data.
    :param dir_suffix: (String) The suffix of the dir from which to get the data.
    :param model_xFolds: (Array of Integer) The xFolds of the models.
    :param loss_enty_tag: (String): The tag of the overall losses in the tensorboard data.
    :param loss_enty_tag: (String): The tag of the objective losses in the tensorboard data.
    :param loss_enty_tag: (String): The tag of the regularization losses in the tensorboard data.
    :param loss_enty_tag: (String): The tag of the accuracies in the tensorboard data.
    """
    steps_per_xFold=[[] for _ in range(len(model_xFolds))]
    losses_per_xFold=[[] for _ in range(len(model_xFolds))]
    obj_losses_per_xFold=[[] for _ in range(len(model_xFolds))]
    reg_losses_per_xFold=[[] for _ in range(len(model_xFolds))]
    accs_per_xFold=[[] for _ in range(len(model_xFolds))]
    
    first_round = True
    
    for xFold in model_xFolds:
        steps, losses = loadTensorboardTensorData(loss_enty_tag, dir_prefix + str(xFold) + dir_suffix)
        sort_index= np.argsort(np.array(steps))            
        steps, obj_losses = loadTensorboardTensorData(obj_loss_enty_tag, dir_prefix + str(xFold) + dir_suffix)
        steps, reg_losses = loadTensorboardTensorData(reg_loss_enty_tag, dir_prefix + str(xFold) + dir_suffix)
      
        if acc_enty_tag:
            steps, accs = loadTensorboardTensorData(acc_enty_tag, dir_prefix + str(xFold) + dir_suffix)
            accs = np.array(accs)[sort_index]
        else:
            accs = np.zeros(len(steps))
          
        steps = np.array(steps)[sort_index]
        losses = np.array(losses)[sort_index]
        obj_losses = np.array(obj_losses)[sort_index]
        reg_losses = np.array(reg_losses)[sort_index]

        if not first_round:
            if not (steps_per_xFold[xFold-1] == steps).all():
                print("Error xFolds lens do not match! ")
                return None
        
        steps_per_xFold[xFold] = steps
        
        for loss, obj_loss, reg_loss, acc in zip(losses, obj_losses, reg_losses, accs):
            losses_per_xFold[xFold].append(tf.io.decode_raw(loss.tensor_content, loss.dtype).numpy()[0])
            obj_losses_per_xFold[xFold].append(tf.io.decode_raw(obj_loss.tensor_content, obj_loss.dtype).numpy()[0])
            reg_losses_per_xFold[xFold].append(tf.io.decode_raw(reg_loss.tensor_content, reg_loss.dtype).numpy()[0])
            if acc_enty_tag:
                accs_per_xFold[xFold].append(tf.io.decode_raw(acc.tensor_content, acc.dtype).numpy()[0])    
        first_round = False     
   
    return steps_per_xFold, losses_per_xFold, obj_losses_per_xFold, reg_losses_per_xFold, accs_per_xFold

In [11]:
def loadPretextModelsEvalData(data_path, dataset_name, pretext_model_names, xFolds, steps_per_epoch, loss_enty_tag ="loss", obj_loss_enty_tag = "obj_loss", reg_loss_enty_tag = "reg_loss", acc_enty_tags = None):
    """
    Loads the evaluation data of the pretext models given by data_path, dataset_name, pretext_model_names and xFolds.
    
    :param data_path: (String) The path from which to get the data.
    :param dataset_name: (String) The names of the datasaset for which to get the data.
    :param pretext_model_names: (Array of Strings) The name of the pretext models for which to get the data.
    :param xFolds: (Array of Integer) The xFold of the pretext models.
    :param steps_per_epoch: (Integer) The number of steps in an epoch.
    :param loss_enty_tag: (String): The tag of the overall losses in the tensorboard data.
    :param loss_enty_tag: (String): The tag of the objective losses in the tensorboard data.
    :param loss_enty_tag: (String): The tag of the regularization losses in the tensorboard data.
    :param loss_enty_tag: (String): The tag of the accuracies in the tensorboard data.
    """
    epochs_per_xFold_per_model=[]
    losses_per_xFold_per_model=[]
    obj_losses_per_xFold_per_model=[]
    reg_losses_per_xFold_per_model=[]
    accs_per_xFold_per_model=[]
    
    acc_enty_tag = None
    for pretext_model_name in pretext_model_names:
        dir_prefix = data_path + "/"+ pretext_model_name + "/" + dataset_name + "/xFoldCrossVal_"
        dir_suffix = "/logs/eval"
        
        if acc_enty_tags:
            acc_enty_tag = acc_enty_tags[pretext_model_name]
            
        steps_per_xFold, losses_per_xFold, obj_losses_per_xFold, reg_losses_per_xFold, accs_per_xFold = loadModelTensorboardData(dir_prefix, dir_suffix, xFolds, loss_enty_tag =loss_enty_tag, obj_loss_enty_tag = obj_loss_enty_tag, reg_loss_enty_tag = reg_loss_enty_tag, acc_enty_tag = acc_enty_tag)
        
        steps_per_xFold = np.array(steps_per_xFold)
        epochs_per_xFold_per_model.append(np.divide(steps_per_xFold, steps_per_epoch))
        losses_per_xFold_per_model.append(losses_per_xFold)
        obj_losses_per_xFold_per_model.append(obj_losses_per_xFold)
        reg_losses_per_xFold_per_model.append(reg_losses_per_xFold)
        accs_per_xFold_per_model.append(accs_per_xFold)
        
    return epochs_per_xFold_per_model, losses_per_xFold_per_model, obj_losses_per_xFold_per_model, reg_losses_per_xFold_per_model, accs_per_xFold_per_model

In [7]:
# Dont ask me why I used text files
def loadTargetModelEvalTxtFile(path, target_model_name, dataset_name, pretext_model_name, pretext_model_xFold):
    """
    Loads the stats of the target models trained on every specified pretext training step for the given dataset. 
    
    :param path: (String) The path from which to get the states.
    :param target_model_name: (String) The name of the target model for which to get the data every trained checkpoint.
    :param dataset_name: (String) The names of the datasaset for which to get the data for every trained checkpoint.
    :param pretext_model_name: (String) The name of the pretext model the target model was trained on.
    :param pretext_model_xFold: (Integer) The xFold of the pretext model the target model was trained on.
    """
    dir_path = path + "/" + target_model_name + "/" + dataset_name +  "/" + pretext_model_name + "/loadedxFoldCrossVal_" + str(pretext_model_xFold) 
    text_files = glob.glob(dir_path+'/checkpoint_*/txtlogs/eval.txt', recursive=True)  
    if not text_files:
        print("No textfile of " + str(target_model_name) + " for " + str(pretext_model_name) + "found.")
        return
    
    pretext_steps=[]      
    best_epochs=[]
    best_steps=[]
    best_losses=[]
    best_obj_losses=[]
    best_reg_losses=[]
    best_loss_accs=[]
    best_accs = []
    for file in text_files:
        file_valid=0
        pretext_steps.append(int(file.split("/checkpoint_")[1].split("/")[0]))
        with open(file) as f:
            content = f.readlines()    
            for line in content:
                if line.find("Best Loss Epoch") != -1:
                    best_epochs.append(int(line.split(":")[1].strip()))
                    file_valid+=1
                elif line.find("Best Loss Step") != -1:
                    best_steps.append(int(line.split(":")[1].strip()))
                    file_valid+=1
                elif line.find("Best Losses") != -1:
                    losses = line.split(":")[1].strip()[1:-1].split(",") 
                    best_losses.append(float(losses[0].strip()))
                    best_obj_losses.append(float(losses[1].strip()))
                    best_reg_losses.append(float(losses[2].strip()))
                    file_valid+=1
                elif line.find("Best Acc") != -1:
                    best_loss_accs.append(float(line.split(":")[1].strip()))
                    accs = [float(line.split(":")[1].strip()) for line in content if "Acc for epoch" in line]
                    best_accs.append(max(accs))
                    file_valid+=1     
                    
        if file_valid is not 4:                                 
            print("Textfile " + str(pretext_steps[-1]) + " could not be read")
            return None  
        sort_index= np.argsort(pretext_steps)
        
    return np.array(pretext_steps)[sort_index], np.array(best_epochs)[sort_index], np.array(best_steps)[sort_index], np.array(best_losses)[sort_index], np.array(best_obj_losses)[sort_index], np.array(best_reg_losses)[sort_index], np.array(best_loss_accs)[sort_index], np.array(best_accs)[sort_index]

In [1]:
def loadTargetModelXFoldDataFromEvalTxtFile(path, target_model_name, dataset_name, pretext_model_name, pretext_model_xFolds):    
    """
    Loads the stats of the target models train on every specified pretext training step for the given xFolds and dataset. 
    
    :param path: (String) The path from which to get the states.
    :param target_model_name: (String) The name of the target model for which to get the data every trained checkpoint.
    :param dataset_name: (String) The names of the datasaset for which to get the data for every trained checkpoint.
    :param pretext_model_name: (String) The name of the pretext model the target model was trained on.
    :param pretext_model_xFold: (Integer) The xFold of the pretext model the target model was trained on.
    """
    pretext_steps_per_xFold =[]      
    best_epochs_per_xFold=[]
    best_steps_per_xFold=[]
    best_losses_per_xFold=[]
    best_obj_losses_per_xFold=[]
    best_reg_losses_per_xFold=[]
    best_loss_accs_per_xFold=[]
    best_accs_per_xFold=[]
    
    first_round = True
    for xFold in pretext_model_xFolds:
        pretext_steps, best_epochs, best_steps, best_losses, best_obj_losses, best_reg_losses, best_loss_accs, best_accs = loadTargetModelEvalTxtFile(path, target_model_name, dataset_name, pretext_model_name, xFold)
                        
        if not first_round:
            if not (pretext_steps_per_xFold[-1] == pretext_steps).all():
                print("Error xFolds lens do not match! ")
                return None

        pretext_steps_per_xFold.append(pretext_steps)
        best_epochs_per_xFold.append(best_epochs)
        best_steps_per_xFold.append(best_steps)
        best_losses_per_xFold.append(best_losses)
        best_obj_losses_per_xFold.append(best_obj_losses)
        best_reg_losses_per_xFold.append(best_reg_losses)
        best_loss_accs_per_xFold.append(best_loss_accs)
        best_accs_per_xFold.append(best_accs) 
        first_round = False   
        
    return pretext_steps_per_xFold, best_epochs_per_xFold, best_steps_per_xFold, best_losses_per_xFold, best_obj_losses_per_xFold, best_reg_losses_per_xFold, best_loss_accs_per_xFold, best_accs_per_xFold

In [1]:
def loadTargetModelTextEvalData(data_path, target_model_name, dataset_name, pretext_model_names, pretext_model_xFolds, steps_per_epoch):
    """
    Loads the stats of the target models train on every specified pretext training step for the given xFolds and dataset. 
    
    :param data_path: (String) The path from which to get the data.
    :param target_model_name: (String) The name of the target model for which to get the best loss stats for every trained checkpoint.
    :param dataset_name: (String) The names of the datasaset for which to get the stats for every trained checkpoint.
    :param pretext_model_names: (Array of Strings) The names of the pretext model the target model was trained on.
    :param pretext_model_xFolds: (Array of Integer) The xFolds of the pretext models the target model was trained on.
    :param steps_per_epoch: (Integer) The number of steps in an epoch.
    """ 
    epochs_per_xFold_per_model=[]
    best_epochs_per_xFold_per_model=[] 
    best_steps_per_xFold_per_model=[]
    best_losses_per_xFold_per_model=[]
    best_obj_losses_per_xFold_per_model=[]
    best_reg_losses_per_xFold_per_model=[]
    best_loss_accs_per_xFold_per_model=[]
    best_accs_per_xFold_per_model = []
    
    for pretext_model_name in pretext_model_names:
        pretext_steps_per_xFold, best_epochs_per_xFold, best_steps_per_xFold, best_losses_per_xFold, best_obj_losses_per_xFold, best_reg_losses_per_xFold, best_loss_accs_per_xFold, best_accs_per_xFold = loadTargetModelXFoldDataFromEvalTxtFile(data_path, target_model_name, dataset_name, pretext_model_name, pretext_model_xFolds)
        
        epochs_per_xFold_per_model.append(np.divide(pretext_steps_per_xFold,steps_per_epoch))
        best_epochs_per_xFold_per_model.append(np.array(best_epochs_per_xFold))
        best_steps_per_xFold_per_model.append(np.array(best_steps_per_xFold))
        best_losses_per_xFold_per_model.append(np.array(best_losses_per_xFold))
        best_obj_losses_per_xFold_per_model.append(np.array(best_obj_losses_per_xFold))
        best_reg_losses_per_xFold_per_model.append(np.array(best_reg_losses_per_xFold))
        best_loss_accs_per_xFold_per_model.append(np.array(best_loss_accs_per_xFold))
        best_accs_per_xFold_per_model.append(np.array(best_accs_per_xFold))
        
    return epochs_per_xFold_per_model, best_epochs_per_xFold_per_model, best_steps_per_xFold_per_model, best_losses_per_xFold_per_model, best_obj_losses_per_xFold_per_model, best_reg_losses_per_xFold_per_model, best_loss_accs_per_xFold_per_model, best_accs_per_xFold_per_model

In [2]:
def loadTargetModelTensorboardEvalData(data_path, dataset_name, target_model_name, pretext_model_name, pretext_model_xFolds, steps_per_epoch, loss_enty_tag ="loss", obj_loss_enty_tag = "obj_loss", reg_loss_enty_tag = "reg_loss", acc_enty_tag = None):
    """
    Loads the tensoboard data of the target model given by data_path, dataset_name, target_model_name, pretext_model_name and its xFolds.
    
    :param data_path: (String) The path  from which to get the data.
    :param target_model_name: (String) The name of the target model for which to get the best loss stats for every trained checkpoint.
    :param dataset_name: (String) The names of the datasaset for which to get the stats for every trained checkpoint.
    :param pretext_model_names: (Array of Strings) The names of the pretext model the target model was trained on.
    :param pretext_model_xFolds: (Array of Integers) The xFolds of the pretext models the target model was trained on.
    :param steps_per_epoch: (Integer) The number of steps in an epoch.
    :param loss_enty_tag: (String): The tag of the overall losses in the tensorboard data.
    :param loss_enty_tag: (String): The tag of the objective losses in the tensorboard data.
    :param loss_enty_tag: (String): The tag of the regularization losses in the tensorboard data.
    :param loss_enty_tag: (String): The tag of the accuracies in the tensorboard data.
    """
    trained_models_per_epoch = {}    
    model_folder_path =  data_path + "/" +  target_model_name + "/" +  dataset_name + "/" + pretext_model_name                 
    dir_prefix = model_folder_path + "/loadedxFoldCrossVal_"
    eval_dirs = glob.glob(model_folder_path + "/loadedxFoldCrossVal_0/checkpoint_*/logs/eval", recursive=True) 
    eval_dirs.reverse()
    eval_dirs = [eval_dirs[i].split("/checkpoint")[1] for i in range(len(eval_dirs))]    
                          
    for dir_suffix in eval_dirs:     
        pretext_model_epoch = int(dir_suffix.split("_")[1].split("/")[0])/steps_per_epoch                   
        steps_per_xFold, losses_per_xFold, _, _, _ = loadModelTensorboardData(dir_prefix, "/checkpoint"+dir_suffix, pretext_model_xFolds, loss_enty_tag = loss_enty_tag, obj_loss_enty_tag=obj_loss_enty_tag, reg_loss_enty_tag=reg_loss_enty_tag, acc_enty_tag = acc_enty_tag)
   
        if pretext_model_epoch not in trained_models_per_epoch.keys():
            trained_models_per_epoch[pretext_model_epoch]={}
        if "xFold_epochs" not in trained_models_per_epoch[pretext_model_epoch].keys():
            trained_models_per_epoch[pretext_model_epoch]["xFold_epochs"]=[]
        if "xFold_losses" not in trained_models_per_epoch[pretext_model_epoch].keys():
            trained_models_per_epoch[pretext_model_epoch]["xFold_losses"]=[]
                                       
        trained_models_per_epoch[pretext_model_epoch]["xFold_epochs"].append(np.divide(steps_per_xFold,steps_per_epoch)[0]) 
        trained_models_per_epoch[pretext_model_epoch]["xFold_losses"].append(losses_per_xFold[0])  

    return trained_models_per_epoch

In [None]:
def printMeanMinMax(prefix, name, values):
    """
    Helper function for debugging.
    """
    max_value=np.max(values)
    min_value=np.min(values)
    mean_value=np.mean(values)
    print(prefix +
                      " | Max "+ str(name) + ": " + str(np.around(max_value, 2)) + 
                      " | Min "+ str(name) + ": " + str(np.around(min_value, 2)) + 
                      " | Mean "+ str(name) + ": " + str(np.around(mean_value, 2))  + 
                      " | + "+ str(name) + ": " + str(np.around((max_value-mean_value), 2)) + 
                      " | - "+ str(name) + ": " + str(np.around((min_value-mean_value), 2)))