In [1]:
import os
import numpy as np
import pandas as pd
import functools as ft

from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

from os.path import dirname as up
path_cur = os.path.abspath(os.getcwd())

In [2]:
def get_train_loss(all_train_loss, epochs):
    
    assert isinstance(all_train_loss, list)
    
    train_loss = list() # [[], [], []]
    steps = int(len(all_train_loss)/epochs)

    # calculate the mean training loss for each epoch
    for i in range(epochs):
        train_loss.append([np.sum(all_train_loss[i*steps:(i+1)*steps])]) 
    
    return train_loss


def tabulate_events(dpath):
    
    summary_iterators = [EventAccumulator(dpath).Reload()]

    tags = summary_iterators[0].Tags()['scalars']

    for it in summary_iterators:
        assert it.Tags()['scalars'] == tags

    out = defaultdict(list)
    steps = []

    for tag in tags:
        
        if tag != 'training loss':
            
            steps = [e.step for e in summary_iterators[0].Scalars(tag)]

            for events in zip(*[acc.Scalars(tag) for acc in summary_iterators]):
                assert len(set(e.step for e in events)) == 1

                out[tag].append([e.value for e in events])

    return out, steps


def to_csv(dpath):
    
    # dirs = os.listdir(dpath)
    
    dirs = list()
    for r, d, f in os.walk(dpath):
        for file in f:
            if not os.path.join(r, file).endswith('.DS_Store'):
                dirs.append(os.path.join(r, file))


    d, steps = tabulate_events(dpath)
    tags, values = zip(*d.items())
    np_values = np.array(values)

    for index, tag in enumerate(tags):
        df = pd.DataFrame(np_values[index], index=steps, columns=dirs)
        df.to_csv(get_file_path(dpath, tag))


def get_file_path(dpath, tag):
    file_name = tag.replace("/", "_") + '.csv'
    folder_path = os.path.join(dpath, 'csv')
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    return os.path.join(folder_path, file_name)


def get_df(pathlist, dname):
    
    assert isinstance(pathlist, list)
    assert len(pathlist) == 3
    
    dfs = list()
    for i in range(len(pathlist)):
        
        if 'Train' in pathlist[i]:
            new_col = 'train_loss'
        
        elif 'Test' in pathlist[i]:
            new_col = 'test_loss'
        else:
            pass

        out, steps = tabulate_events(pathlist[i])
        
        columns = list(out.keys())
        
        if len(columns) == 1:
            outdict = dict()
            outdict[new_col] = out[columns[0]][-1]
            df = pd.DataFrame.from_dict(outdict)
            df['dataset_model'] = dname
        
        else:
            outdict = dict()
            for column in columns:

                if not column in outdict.keys():
                    outdict[column] = out[column][-1]

            df = pd.DataFrame.from_dict(outdict)
            df['dataset_model'] = dname
        
        dfs.append(df)
    
    df_final = ft.reduce(lambda left, right: pd.merge(left, right, on='dataset_model'), dfs)

    return df_final # dataframe


def get_all_files(alldir):

    allpaths = dict()

    for folder in os.listdir(alldir):

        if os.path.isdir(os.path.join(alldir, folder)) and not folder.endswith('checkpoints'):

            if not folder in allpaths.keys():
                allpaths[folder] = list()

                subpath = os.path.join(alldir, folder)

                for r, d, f in os.walk(subpath):
                    for file in f:
                        if not os.path.join(r, file).endswith('.DS_Store'):
                            allpaths[folder].append(os.path.join(r, file))
    
    return allpaths


def get_all_dfs(allpaths, mode='backbone'):

    df_final = list()

    for key, item in allpaths.items():

        df = get_df(item, key)
        df_final.append(df)

    sum_df = pd.concat(df_final, axis=0)
    sum_df['resolution'] = sum_df['dataset_model'].apply(lambda x: x.split('_')[-1])
    
    if mode == 'backbone':    
        sum_df['backbone'] = sum_df['dataset_model'].apply(lambda x: x.split('_')[1])
        sum_df['dataset'] = sum_df['dataset_model'].apply(lambda x: '_'.join(x.split('_')[3:]))
    else:
        sum_df['backbone'] = 'Unet'
        sum_df['dataset'] = sum_df['dataset_model'].apply(lambda x: '_'.join(x.split('_')[2:]))

        
    sum_df = sum_df.reset_index(drop=True)

    return sum_df

In [3]:
import glob

logs_dir = os.path.join(up(path_cur), 'logs_paper')
logs_backbone_dir = os.path.join(up(path_cur), 'logs_backbone_paper')

logs_files = get_all_files(logs_dir)
logs_backbone_files = get_all_files(logs_backbone_dir)

In [5]:
logs_df = get_all_dfs(logs_files, mode='None')
logs_backbone_df = get_all_dfs(logs_backbone_files, mode='backbone')
all_logs_df = pd.concat([logs_df, logs_backbone_df], axis=0)
all_logs_df = all_logs_df.reset_index(drop=True)

In [6]:
all_logs_df

Unnamed: 0,Precision/test macroPrec,Precision/test microPrec,Precision/test weightPrec,Recall/test macroRec,Recall/test microRec,Recall/test weightRec,F1/test macroF1,F1/test microF1,F1/test weightF1,IoU/test MacroIoU,dataset_model,test_loss,train_loss,resolution,backbone,dataset
0,0.643053,0.653089,0.670298,0.699998,0.653089,0.653089,0.656172,0.653089,0.656695,0.507965,unet_0.002_Image_allyear_merged_256,0.716299,0.68266,256,Unet,Image_allyear_merged_256
1,0.552445,0.598994,0.605654,0.578904,0.598994,0.598994,0.559918,0.598994,0.598999,0.399987,unet_0.002_Image_after_2010_merged_512,0.867193,0.825471,512,Unet,Image_after_2010_merged_512
2,0.664268,0.698909,0.710225,0.67599,0.698909,0.698909,0.668122,0.698909,0.70312,0.523301,unet_0.002_Image_after_2010_merged_256,0.658505,0.658191,256,Unet,Image_after_2010_merged_256
3,0.5351,0.599492,0.615045,0.564539,0.599492,0.599492,0.53977,0.599492,0.605662,0.383416,unet_0.002_Image_allyear_merged_512,0.904631,0.833976,512,Unet,Image_allyear_merged_512
4,0.536507,0.698043,0.696549,0.566809,0.698043,0.698043,0.530698,0.698043,0.69394,0.395852,unet_0.002_Image_after_2010_VA_512,0.815415,0.731571,512,Unet,Image_after_2010_VA_512
5,0.56738,0.588985,0.621071,0.571398,0.588985,0.588985,0.549881,0.588985,0.598032,0.386448,unet_0.002_Image_after_2010_VA_256,0.901397,0.662959,256,Unet,Image_after_2010_VA_256
6,0.483936,0.695874,0.698348,0.533214,0.695874,0.695874,0.503942,0.695874,0.695886,0.380883,unet_0.002_Image_allyear_VA_512,0.845177,0.716052,512,Unet,Image_allyear_VA_512
7,0.494995,0.602631,0.604974,0.523191,0.602631,0.602631,0.49756,0.602631,0.600295,0.349212,unet_0.002_Image_allyear_VA_256,0.989898,0.834894,256,Unet,Image_allyear_VA_256
8,0.570245,0.650197,0.655369,0.575504,0.650197,0.650197,0.570162,0.650197,0.652118,0.410908,unet_resnet18_0.002_Image_allyear_VA_256,1.146997,0.896222,256,resnet18,Image_allyear_VA_256
9,0.542669,0.707194,0.716202,0.526689,0.707194,0.707194,0.530967,0.707194,0.711234,0.407183,unet_resnet18_0.002_Image_allyear_VA_512,1.065666,0.854239,512,resnet18,Image_allyear_VA_512


In [7]:
# Get the pixel statistics for each dataset

pixel_count_statistics_path = os.path.join(up(path_cur), "pixel_count_statistics.csv")
pixel_sts_df = pd.read_csv(pixel_count_statistics_path)

In [8]:
pixel_sts_df = pixel_sts_df.astype(str)
all_logs_df = all_logs_df.astype(str)

In [11]:

allstatedf = all_logs_df.merge(pixel_sts_df, on='dataset', how='left')


In [12]:
allstatedf.to_csv(os.path.join(up(path_cur), 'model_results_sum.csv'), encoding='utf-8', sep=',', index=False)

In [None]:
allstatedf